Python sklearn.neighbors 模块,KNeighborsRegressor() 实例源码

我们从Python开源项目中,提取了以下46个代码示例,用于说明如何使用sklearn.neighbors.KNeighborsRegressor()

项目:Parallel-SGD    作者:angadgill    | 项目源码 | 文件源码
def test_regression():
    # Check regression for various parameter settings.
    rng = check_random_state(0)
    X_train, X_test, y_train, y_test = train_test_split(boston.data[:50],
                                                        boston.target[:50],
                                                        random_state=rng)
    grid = ParameterGrid({"max_samples": [0.5, 1.0],
                          "max_features": [0.5, 1.0],
                          "bootstrap": [True, False],
                          "bootstrap_features": [True, False]})

    for base_estimator in [None,
                           DummyRegressor(),
                           DecisionTreeRegressor(),
                           KNeighborsRegressor(),
                           SVR()]:
        for params in grid:
            BaggingRegressor(base_estimator=base_estimator,
                             random_state=rng,
                             **params).fit(X_train, y_train).predict(X_test)
项目:mlens    作者:flennerhag    | 项目源码 | 文件源码
def build_ensemble(**kwargs):
    """Generate ensemble."""

    ens = SuperLearner(**kwargs)
    prep = {'Standard Scaling': [StandardScaler()],
            'Min Max Scaling': [MinMaxScaler()],
            'No Preprocessing': []}

    est = {'Standard Scaling':
               [ElasticNet(), Lasso(), KNeighborsRegressor()],
           'Min Max Scaling':
               [SVR()],
           'No Preprocessing':
               [RandomForestRegressor(random_state=SEED),
                GradientBoostingRegressor()]}

    ens.add(est, prep)

    ens.add(GradientBoostingRegressor(), meta=True)

    return ens
项目:ModelFlow    作者:yuezPrincetechs    | 项目源码 | 文件源码
def fit(self,X,y):
        '''
        ??knn?????
        :param X: ??????dataframe???????????????
        :param y: ??????series??X???????????????????????
        :return:
        '''
        X=pd.DataFrame(X.copy())
        X=X.reset_index(drop=True)
        y=pd.Series(y.copy())
        y=y.reset_index(drop=True)
        self.means=y.mean()
        self.models={}
        for col in X.columns.tolist():
            if col in self.feature_cate:
                self.models[col]=y.groupby(X[col]).mean().to_dict()
            else:
                knn=KNeighborsRegressor(n_neighbors=self.n_neighbors)
                knn.fit(X[[col]],y)
                self.models[col]=copy.deepcopy(knn)
        return self
项目:strategy    作者:kanghua309    | 项目源码 | 文件源码
def model_cross_valid(X,Y):
    seed = 7
    kfold = model_selection.KFold(n_splits=10, random_state=seed)
    def bulid_model(model_name):
        model = model_name()
        return model
    scoring = 'neg_mean_squared_error'
    # + random fest boost lstm gbdt

    for model_name in [LinearRegression,ElasticNet]:
    #for model_name in [LinearRegression,Ridge,Lasso,ElasticNet,KNeighborsRegressor,DecisionTreeRegressor,SVR,RandomForestRegressor,AdaBoostRegressor,GradientBoostingRegressor]:
        model = bulid_model(model_name)
        results = model_selection.cross_val_score(model, X, Y, cv=kfold, scoring=scoring)
        print(model_name,results.mean())
项目:AirTicketPredicting    作者:junlulocky    | 项目源码 | 文件源码
def parameterChoosing(self):
        # Set the parameters by cross-validation
        tuned_parameters = [{'weights': ['uniform', 'distance'],
                             'n_neighbors': range(2,100)
                             }
                            ]


        reg = GridSearchCV(neighbors.KNeighborsRegressor(), tuned_parameters, cv=5, scoring='mean_squared_error')
        reg.fit(self.X_train, self.y_train)

        print "Best parameters set found on development set:\n"
        print reg.best_params_

        print "Grid scores on development set:\n"
        for params, mean_score, scores in reg.grid_scores_:
            print "%0.3f (+/-%0.03f) for %r\n" % (mean_score, scores.std() * 2, params)

        print reg.scorer_

        print "MSE for test data set:"
        y_true, y_pred = self.y_test, reg.predict(self.X_test)
        print mean_squared_error(y_pred, y_true)
项目:Parallel-SGD    作者:angadgill    | 项目源码 | 文件源码
def test_kneighbors_regressor(n_samples=40,
                              n_features=5,
                              n_test_pts=10,
                              n_neighbors=3,
                              random_state=0):
    # Test k-neighbors regression
    rng = np.random.RandomState(random_state)
    X = 2 * rng.rand(n_samples, n_features) - 1
    y = np.sqrt((X ** 2).sum(1))
    y /= y.max()

    y_target = y[:n_test_pts]

    weight_func = _weight_func

    for algorithm in ALGORITHMS:
        for weights in ['uniform', 'distance', weight_func]:
            knn = neighbors.KNeighborsRegressor(n_neighbors=n_neighbors,
                                                weights=weights,
                                                algorithm=algorithm)
            knn.fit(X, y)
            epsilon = 1E-5 * (2 * rng.rand(1, n_features) - 1)
            y_pred = knn.predict(X[:n_test_pts] + epsilon)
            assert_true(np.all(abs(y_pred - y_target) < 0.3))
项目:Parallel-SGD    作者:angadgill    | 项目源码 | 文件源码
def test_KNeighborsRegressor_multioutput_uniform_weight():
    # Test k-neighbors in multi-output regression with uniform weight
    rng = check_random_state(0)
    n_features = 5
    n_samples = 40
    n_output = 4

    X = rng.rand(n_samples, n_features)
    y = rng.rand(n_samples, n_output)

    X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=0)
    for algorithm, weights in product(ALGORITHMS, [None, 'uniform']):
        knn = neighbors.KNeighborsRegressor(weights=weights,
                                            algorithm=algorithm)
        knn.fit(X_train, y_train)

        neigh_idx = knn.kneighbors(X_test, return_distance=False)
        y_pred_idx = np.array([np.mean(y_train[idx], axis=0)
                               for idx in neigh_idx])

        y_pred = knn.predict(X_test)

        assert_equal(y_pred.shape, y_test.shape)
        assert_equal(y_pred_idx.shape, y_test.shape)
        assert_array_almost_equal(y_pred, y_pred_idx)
项目:Parallel-SGD    作者:angadgill    | 项目源码 | 文件源码
def test_kneighbors_regressor_sparse(n_samples=40,
                                     n_features=5,
                                     n_test_pts=10,
                                     n_neighbors=5,
                                     random_state=0):
    # Test radius-based regression on sparse matrices
    # Like the above, but with various types of sparse matrices
    rng = np.random.RandomState(random_state)
    X = 2 * rng.rand(n_samples, n_features) - 1
    y = ((X ** 2).sum(axis=1) < .25).astype(np.int)

    for sparsemat in SPARSE_TYPES:
        knn = neighbors.KNeighborsRegressor(n_neighbors=n_neighbors,
                                            algorithm='auto')
        knn.fit(sparsemat(X), y)
        for sparsev in SPARSE_OR_DENSE:
            X2 = sparsev(X)
            assert_true(np.mean(knn.predict(X2).round() == y) > 0.95)
项目:Parallel-SGD    作者:angadgill    | 项目源码 | 文件源码
def test_neighbors_iris():
    # Sanity checks on the iris dataset
    # Puts three points of each label in the plane and performs a
    # nearest neighbor query on points near the decision boundary.

    for algorithm in ALGORITHMS:
        clf = neighbors.KNeighborsClassifier(n_neighbors=1,
                                             algorithm=algorithm)
        clf.fit(iris.data, iris.target)
        assert_array_equal(clf.predict(iris.data), iris.target)

        clf.set_params(n_neighbors=9, algorithm=algorithm)
        clf.fit(iris.data, iris.target)
        assert_true(np.mean(clf.predict(iris.data) == iris.target) > 0.95)

        rgs = neighbors.KNeighborsRegressor(n_neighbors=5, algorithm=algorithm)
        rgs.fit(iris.data, iris.target)
        assert_greater(np.mean(rgs.predict(iris.data).round() == iris.target),
                       0.95)
项目:HrlPy    作者:snakeztc    | 项目源码 | 文件源码
def learn(self, experiences, max_iter=20):
        # experience is in (s, a, r, ns)
        states = experiences[:, 0:self.domain.state_space_dims]
        actions = experiences[:, self.domain.state_space_dims]
        rewards = experiences[:, self.domain.state_space_dims+1]
        next_states = experiences[:, self.domain.state_space_dims+2:]
        X = self.representation.phi_sa("root", states, actions)

        for i in range(0, max_iter):
            #old_qs = np.reshape(self.representation.Q("root", states, actions), (-1, 1))
            nqs = self.representation.Qs("root", next_states)
            best_nqs = np.reshape(np.amax(nqs, axis=1), (-1, 1))
            y = rewards+ self.domain.discount_factor * best_nqs
            #resd = np.mean(np.abs(y - old_qs))
            model = KNeighborsRegressor(n_neighbors=2, n_jobs=-1)
            model.fit(X, y)
            self.representation.models["root"] = model
            #print "Residual is " + str(resd)
项目:strategy    作者:kanghua309    | 项目源码 | 文件源码
def model_fit_and_test(TrainX,TrainY,TestX,TestY):
    def bulid_model(model_name):
        model = model_name()
        return model
    #for model_name in [LinearRegression, Ridge, Lasso, ElasticNet, KNeighborsRegressor, DecisionTreeRegressor, SVR,RandomForestRegressor, AdaBoostRegressor, GradientBoostingRegressor]:
    for model_name in [LinearRegression, ElasticNet]:
        model = bulid_model(model_name)
        model.fit(TrainX,TrainY)
        print(model_name)
        resid = model.predict(TestX) - TestY
        #print resid
        print("Residual sum of squares: %f"% np.mean(resid ** 2))
        #print model.predict(TestX)
        #print TestY
        # Explained variance score: 1 is perfect prediction
        plt.scatter(model.predict(TestX), resid);
        plt.axhline(0, color='red')
        plt.xlabel('Predicted Values')
        plt.ylabel('Residuals')
        #plt.xlim([1, 50])
        plt.show()

        print('Variance score: %.2f' % model.score(TestX, TestY))

        from statsmodels.stats.stattools import jarque_bera
        _, pvalue, _, _ = jarque_bera(resid)
        print ("Test Residuals Normal", pvalue)

        from statsmodels import regression, stats
        import statsmodels.api as sms
        import statsmodels.stats.diagnostic as smd
        # xs_with_constant = sms.add_constant(np.column_stack((X1,X2,X3,X4)))
        xs_with_constant = sms.add_constant(TestX)
        _, pvalue1, _, _ = stats.diagnostic.het_breushpagan(resid, xs_with_constant)
        print ("Test Heteroskedasticity", pvalue1)
        ljung_box = smd.acorr_ljungbox(resid, lags=10)

        #print "Lagrange Multiplier Statistics:", ljung_box[0]
        print "Test Autocorrelation P-values:", ljung_box[1]
        if any(ljung_box[1] < 0.05):
            print "The residuals are autocorrelated."
        else:
            print "The residuals are not autocorrelated."
项目:AirTicketPredicting    作者:junlulocky    | 项目源码 | 文件源码
def __init__(self, isTrain):
        super(RegressionKNN, self).__init__(isTrain)
        # data preprocessing
        #self.dataPreprocessing()

        # Create KNN regression object
        # first parameter is the K neighbors
        # 'uniform' assigns uniform weights to each neighbor
        # 'distance' assigns weights proportional to the inverse of the distance from the query point
        # default metric is euclidean distance
        self.regr = neighbors.KNeighborsRegressor(86, weights='distance')
项目:smp_base    作者:x75    | 项目源码 | 文件源码
def __init__(self, conf):
        """smpKNN.__init__

        init
        """
        smpModel.__init__(self, conf)

        self.fwd = KNeighborsRegressor(n_neighbors = self.n_neighbors)

        self.X_ = []
        self.y_ = []

        self.bootstrap()
项目:coursera-machine-learning-yandex    作者:dstarcev    | 项目源码 | 文件源码
def calculate(X, y):
    best_p, best_score = 0, -float('inf')
    kf = KFold(len(y), n_folds=5, shuffle=True, random_state=42)
    for p in numpy.linspace(1, 10, num=200):
        knr = KNeighborsRegressor(n_neighbors=5, weights='distance', p=p)
        score = max(cross_val_score(knr, X, y, cv=kf, scoring='mean_squared_error'))
        if score > best_score:
            best_score = score
            best_p = p

    return best_p, best_score
项目:stock_prediction    作者:vishwajeetv    | 项目源码 | 文件源码
def knnPredictor(df):

    dataTrainX, dataTrainY, dataTestX, dataTestY = sample(df)
    corelationCoefficiantDictionary = {}
    corelationCoefficiantArray = []

    for k in range(1, 200, 1):
        knnModel = KNeighborsRegressor(n_neighbors=k)

        knnModel.fit(dataTrainX, dataTrainY)

        knnpredicted = knnModel.predict(dataTestX)
        corelationCoefficient = pearsonr(dataTestY, knnpredicted)
        corelationCoefficiantDictionary[k] = corelationCoefficient[0]
        corelationCoefficiantArray.append(corelationCoefficient[0])

    # plotter.plot(corelationCoefficiantArray)
    bestK = max(corelationCoefficiantDictionary, key=corelationCoefficiantDictionary.get)

    knnModelBest = KNeighborsRegressor(n_neighbors=bestK)
    knnModelBest.fit(dataTrainX, dataTrainY)
    print("K = ")
    print(bestK)
    print("Corelation Coeff:")
    print(corelationCoefficiantDictionary[bestK])

    knnpredictedBest = knnModelBest.predict(dataTestX)

    fig, ax = plotter.subplots()
    corelationCoefficient = pearsonr(dataTestY, knnpredictedBest)
    print(corelationCoefficient[0])
    ax.set_ylabel('Predicted KNN Weekly')
    ax.scatter(dataTestY, knnpredictedBest)
    ax.set_xlabel('Measured')
    plotter.show()
项目:stock_prediction    作者:vishwajeetv    | 项目源码 | 文件源码
def predictKnn(data, priceToPredict):
    corelationCoefficiantDictionary = {}
    corelationCoefficiantArray = []
    openingPriceTrain, openingPriceTest, closingPriceTrain, closingPriceTest = \
        data["openingPriceTrain"], data["openingPriceTest"], data["closingPriceTrain"], data["closingPriceTest"]

    for k in range( 1 , 100 , 1):
        neigh = KNeighborsRegressor(n_neighbors=k)
        #n = 7 best fits
        neigh.fit(openingPriceTrain, closingPriceTrain)

        closingPriceTestArray = np.reshape(closingPriceTest,-1)
        knnpr = neigh.predict(openingPriceTest)
        predictedArray = np.reshape(knnpr,-1)

        corelationCoefficient = pearsonr(closingPriceTestArray,predictedArray)
        corelationCoefficiantDictionary[k] = corelationCoefficient[0]
        corelationCoefficiantArray.append(corelationCoefficient[0])
    plotter.plot(corelationCoefficiantArray)
    # plotter.show()

    bestK = max(corelationCoefficiantDictionary, key=corelationCoefficiantDictionary.get)
    neighBest = KNeighborsRegressor(n_neighbors=bestK)
    neighBest.fit(openingPriceTrain, closingPriceTrain)
    openingPriceToPredict = np.array([priceToPredict])
    print("K = ")
    print(bestK)
    print(neighBest.predict(openingPriceToPredict))
项目:mlens    作者:flennerhag    | 项目源码 | 文件源码
def build_ensemble(**kwargs):
    """Generate ensemble."""

    ens = SuperLearner(**kwargs)

    est = [ElasticNet(copy_X=False),
           Lasso(copy_X=False)]

    ens.add(est)
    ens.add(KNeighborsRegressor())

    return ens
项目:mlens    作者:flennerhag    | 项目源码 | 文件源码
def knn():
    """Fit KNN."""
    print("Fitting KNN...", end=" ", flush=True)
    time.sleep(SLEEP)
    t0 = time.time()
    knn = KNeighborsRegressor()
    knn.fit(X, y)
    print_time(t0, "Done", end="")
项目:Photometric-Redshifts    作者:martiansideofthemoon    | 项目源码 | 文件源码
def knn_regression(K, training_data, labels, test_data, weights='distance'):
    knn = neighbors.KNeighborsRegressor(K, weights=weights)
    output = knn.fit(training_data, labels).predict(test_data)
    return output
项目:ML-Predictions    作者:ltfschoen    | 项目源码 | 文件源码
def generate_model(self, regressor, qty_neighbors, algorithm, distance_type):
        """ Regressor Model Generation"""
        if regressor == "knn":
            return KNeighborsRegressor(n_neighbors=qty_neighbors, algorithm=algorithm, p=distance_type)
        elif regressor == "linear":
            return LinearRegression(fit_intercept=True) # copy_X=True, n_jobs=1, normalize=False
        elif regressor == "logistic":
            return LogisticRegression(class_weight='balanced')
项目:eezzy    作者:3Blades    | 项目源码 | 文件源码
def spot_check(X, y):
    if type == 'regression':
        models = [
        (LinearRegression(), 'Ordinary Least Squares'),
        (Ridge(alpha=0.1), 'Ridge (alpha 0.1)'),
        (Ridge(), 'Ridge (alpha 1.0)'),
        (Lasso(alpha=0.1), 'Lasso (alpha 0.1)'),
        (Lasso(), 'Lasso (alpha 1.0)'),
        (ElasticNet(alpha=0.1), 'ElasticNet (alpha 0.1)'),
        (ElasticNet(), 'ElasticNet (alpha 1.0)'),
        (DecisionTreeRegressor(), 'Decision Tree'),
        (KNeighborsRegressor(), 'K-Nearest Neighbors'),

#         (RandomForestRegressor(), 'Random Forest Regressor'),
#         (BaggingRegressor(), 'Bagging Regressor'),
#         (GradientBoostingRegressor(), 'Gradient Bosted Regression'),
#         (SVR(), 'Support Vector Regression')
    ]

    splits = 5
    scores = []

    for model, model_name in models:
        score = check_model(model, splits, X, y)
        # get average score
        scores.append(score)

    model_names = map(lambda x: x[1], models)
    for name, score in zip(model_names, scores):
        print('%s: %f' % (name, score))
项目:poormining    作者:bowenpay    | 项目源码 | 文件源码
def get_classifier(self, X, Y):
        """ ????????
        :param X: ????
        :param Y: ??????
        :return: ??
        """

        clf = KNeighborsRegressor(weights='uniform')
        clf.fit(X, Y)
        return clf
项目:KerasRL    作者:aejax    | 项目源码 | 文件源码
def __init__(self, S, A, n_neighbors=5, weights='uniform', algorithm='auto', metric='minkowski', memory_fit=100, memory_size=100, **kwargs):
        #assert self.lr_mode == 'constant', 'KNNQ is only compatible with constant learning rates.'
        self.S = S
        self.A = A
        self.states = deque([])
        self.targets = deque([])
        self.memory_fit = memory_fit
        self.memory_size = memory_size
        self.count = 0

        self.neigh = KNeighborsRegressor(n_neighbors=n_neighbors, weights=weights, algorithm=algorithm, metric=metric)

        super(KNNQ, self).__init__(**kwargs)
        self.update_mode = 'set'
项目:binet    作者:crisjf    | 项目源码 | 文件源码
def calculatepRCA(data, y ='',c='',p='',x=''):
    '''
    Returns the pRCA from data. pRCA is the probability that (RCA_{y+1} > 1) given the volume of exports (x_{cpy}),
    and the 'baseline term' (\sum_c x_{cpy}  \sum_p x_{cpy} / \sum_c \sum_p x_{cpy}).
    It is computed using k-nearest neighbors, in the space of log exports and log baseline term.
    Parameters
    ----------
    data : pandas.DataFrame
        Raw data. It has source,target,volume (trade, number of people etc.).
    y,c,p,x : str (optional)
        Labels of the columns in data used for source,target,volume
    Returns
    -------
    RCA : pandas.DataFrame
        Table with the RCAs, with the columns c,p,x,RCA
        If shares is True it also includes:
            s_c : Share of X_cp over X_c
            s_p : Share of X_cp over X_p
    '''
    df = calculateRCA_by_year(data,y ='year',c='ccode',p='pcode',x='x',log_terms = True)

    #Compute (RCA > 1) next year and merge it
    df_ = df.copy()
    df_['year'] = df_['year'] - 1
    df_['RCA_y+1'] = (df_['log(RCA)'] > 0).astype(int)
    df_ = df_[['year','ccode','pcode','RCA_y+1']]
    df = df.merge(df_)

    #Prepare dataset for knn and fit
    M = df[['log(x)','T','RCA_y+1']].as_matrix()
    X, y = M[:,:2], M[:, 2] 
    knn = neighbors.KNeighborsRegressor(n_neighbors = 200, weights = 'uniform').fit(X, y)

    #To avoid memory error, compute predictions in split X. Predictions are output pRCA
    pRCA = np.array([])
    for x in np.array_split(X, 10):
        pRCA = np.append(pRCA, knn.predict(x))
    df['pRCA'] = pRCA

    return df
项目:actinf    作者:x75    | 项目源码 | 文件源码
def __init__(self, idim = 1, odim = 1):
        self.fwd = KNeighborsRegressor(n_neighbors=5)
        ActInfModel.__init__(self, idim, odim)

        self.X_ = []
        self.y_ = []

        self.bootstrap()
项目:stacking    作者:ikki407    | 项目源码 | 文件源码
def build_model(self):
            return KNeighborsRegressor(**self.params)
项目:ML-note    作者:JasonK93    | 项目源码 | 文件源码
def test_KNeighborsRegressor(*data):
    '''
    test the KNN regressor
    :param data: train_data, test_data, train_value, test_value
    :return: None
    '''
    X_train,X_test,y_train,y_test=data
    regr=neighbors.KNeighborsRegressor()
    regr.fit(X_train,y_train)
    print("Training Score:{0}".format(regr.score(X_train,y_train)))
    print("Testing Score:{0}".format(regr.score(X_test,y_test)))
项目:ML-note    作者:JasonK93    | 项目源码 | 文件源码
def test_KNeighborsRegressor_k_w(*data):
    '''
    test the performance with different n_neighbors and weights
    :param data: train_data, test_data, train_value, test_value
    :return: None
    '''
    X_train,X_test,y_train,y_test=data
    Ks=np.linspace(1,y_train.size,num=100,endpoint=False,dtype='int')
    weights=['uniform','distance']

    fig=plt.figure()
    ax=fig.add_subplot(1,1,1)
    ### graph
    for weight in weights:
        training_scores=[]
        testing_scores=[]
        for K in Ks:
            regr=neighbors.KNeighborsRegressor(weights=weight,n_neighbors=K)
            regr.fit(X_train,y_train)
            testing_scores.append(regr.score(X_test,y_test))
            training_scores.append(regr.score(X_train,y_train))
        ax.plot(Ks,testing_scores,label="testing score:weight={0}".format(weight))
        ax.plot(Ks,training_scores,label="training score:weight={0}".format(weight))
    ax.legend(loc='best')
    ax.set_xlabel("K")
    ax.set_ylabel("score")
    ax.set_ylim(0,1.05)
    ax.set_title("KNeighborsRegressor")
    plt.show()
项目:ML-note    作者:JasonK93    | 项目源码 | 文件源码
def test_KNeighborsRegressor_k_p(*data):
    '''
    test the performance with different n_neighbors and p
    :param data: train_data, test_data, train_value, test_value
    :return: None
    '''
    X_train,X_test,y_train,y_test=data
    Ks=np.linspace(1,y_train.size,endpoint=False,dtype='int')
    Ps=[1,2,10]

    fig=plt.figure()
    ax=fig.add_subplot(1,1,1)
    ### graph
    for P in Ps:
        training_scores=[]
        testing_scores=[]
        for K in Ks:
            regr=neighbors.KNeighborsRegressor(p=P,n_neighbors=K)
            regr.fit(X_train,y_train)
            testing_scores.append(regr.score(X_test,y_test))
            training_scores.append(regr.score(X_train,y_train))
        ax.plot(Ks,testing_scores,label="testing score:p={0}".format(P))
        ax.plot(Ks,training_scores,label="training score:p={0}".format(P))
    ax.legend(loc='best')
    ax.set_xlabel("K")
    ax.set_ylabel("score")
    ax.set_ylim(0,1.05)
    ax.set_title("KNeighborsRegressor")
    plt.show()
项目:Black-Swan    作者:12190143    | 项目源码 | 文件源码
def knn(train_sample, validation_sample, features, seed):
    log_base = np.e
    knn_est = KNeighborsRegressor(n_neighbors=1, weights='distance', algorithm='auto', leaf_size=30,
                                  p=1).fit(
        train_sample[features], np.log1p(train_sample['volume']) / np.log(log_base))
    knn_prob = np.power(log_base, knn_est.predict(validation_sample[features])) - 1
    print_mape(validation_sample['volume'], knn_prob, 'KNN')
    return knn_prob
项目:Black-Swan    作者:12190143    | 项目源码 | 文件源码
def main():
    df_train0 = pd.read_csv("train.csv")
    df_train1 = pd.read_csv("train1.csv")
    df_train2 = pd.read_csv("train2.csv")
    df_train3 = pd.read_csv("train3.csv")
    df_train_list = [df_train0]
    df_train = pd.concat(df_train_list)
    len_train = len(df_train)

    df_test = pd.read_csv("test2.csv")
    df_train = df_train.append(df_test)[df_train.columns.tolist()]

    df_date = pd.read_csv("date.csv")
    df_ts = pd.read_csv("ts_feature2_simple.csv")

    print df_test.head()
    df_train = df_train.merge(df_date, on="date", how="left")

    df_train = df_train.merge(df_ts, on=["tollgate_id", "hour", "miniute", "direction"], how="left")

    data = pd.DataFrame.reset_index(df_train)
    data = data.drop("index", axis=1)
    print data.head(1)

    data = feature_transform_knn(key=1, data= data)

    y = data.ix[:len_train - 1]["volume"]
    x = data.ix[:len_train - 1, 8:]
    x1 = data.ix[len_train:, 8:]

    regressor_cubic = KNeighborsRegressor(n_neighbors=15,)
    regressor_cubic.fit(x, y)

    yhat = regressor_cubic.predict(x1)

    df_test["volume"] = yhat
    df_test = df_test[['tollgate_id', 'time_window', 'direction', 'volume']]
    df_test.to_csv("result/result_knn_"+str(np.mean(yhat))+".csv", index=False)

    print np.mean(yhat)
项目:Parallel-SGD    作者:angadgill    | 项目源码 | 文件源码
def test_precomputed_cross_validation():
    # Ensure array is split correctly
    rng = np.random.RandomState(0)
    X = rng.rand(20, 2)
    D = pairwise_distances(X, metric='euclidean')
    y = rng.randint(3, size=20)
    for Est in (neighbors.KNeighborsClassifier,
                neighbors.RadiusNeighborsClassifier,
                neighbors.KNeighborsRegressor,
                neighbors.RadiusNeighborsRegressor):
        metric_score = cross_val_score(Est(), X, y)
        precomp_score = cross_val_score(Est(metric='precomputed'), D, y)
        assert_array_equal(metric_score, precomp_score)
项目:Parallel-SGD    作者:angadgill    | 项目源码 | 文件源码
def test_neighbors_regressors_zero_distance():
    # Test radius-based regressor, when distance to a sample is zero.

    X = np.array([[1.0, 1.0], [1.0, 1.0], [2.0, 2.0], [2.5, 2.5]])
    y = np.array([1.0, 1.5, 2.0, 0.0])
    radius = 0.2
    z = np.array([[1.1, 1.1], [2.0, 2.0]])

    rnn_correct_labels = np.array([1.25, 2.0])

    knn_correct_unif = np.array([1.25, 1.0])
    knn_correct_dist = np.array([1.25, 2.0])

    for algorithm in ALGORITHMS:
        # we don't test for weights=_weight_func since user will be expected
        # to handle zero distances themselves in the function.
        for weights in ['uniform', 'distance']:
            rnn = neighbors.RadiusNeighborsRegressor(radius=radius,
                                                     weights=weights,
                                                     algorithm=algorithm)
            rnn.fit(X, y)
            assert_array_almost_equal(rnn_correct_labels, rnn.predict(z))

        for weights, corr_labels in zip(['uniform', 'distance'],
                                        [knn_correct_unif, knn_correct_dist]):
            knn = neighbors.KNeighborsRegressor(n_neighbors=2,
                                                weights=weights,
                                                algorithm=algorithm)
            knn.fit(X, y)
            assert_array_almost_equal(corr_labels, knn.predict(z))
项目:Parallel-SGD    作者:angadgill    | 项目源码 | 文件源码
def test_predict_sparse_ball_kd_tree():
    rng = np.random.RandomState(0)
    X = rng.rand(5, 5)
    y = rng.randint(0, 2, 5)
    nbrs1 = neighbors.KNeighborsClassifier(1, algorithm='kd_tree')
    nbrs2 = neighbors.KNeighborsRegressor(1, algorithm='ball_tree')
    for model in [nbrs1, nbrs2]:
        model.fit(X, y)
        assert_raises(ValueError, model.predict, csr_matrix(X))
项目:GZ_travelTime    作者:zhilonglu    | 项目源码 | 文件源码
def get_model_list():
    model_list, name_list = [], []

    # model_list.append(linear_model.LinearRegression())
    # name_list.append('LR')

    # model_list.append(gaussian_process.GaussianProcessRegressor(alpha=1e-10))
    # name_list.append('GaussianProcess')

    # model_list.append(KNeighborsRegressor(weights = 'uniform',n_neighbors=28))
    # name_list.append('KNN_unif')
    #
    # model_list.append(KNeighborsRegressor(weights = 'distance',n_neighbors=28))
    # name_list.append('KNN_dist')
    #
    # model_list.append(SVR(kernel = 'poly', C = 1, gamma = 'auto', coef0 = 0, degree = 2))
    # name_list.append('SVR_poly')
    # #
    model_list.append(SVR(kernel = 'rbf', C = 0.3, gamma = 'auto'))
    name_list.append('SVR_rbf')
    # #
    # model_list.append(DecisionTreeRegressor())
    # name_list.append('DT')
    #
    # model_list.append(RandomForestRegressor(n_estimators=150, max_depth=None,min_samples_split=2, random_state=0))
    # name_list.append('RF')
    #
    # model_list.append(ExtraTreesRegressor(n_estimators=150, max_depth=None, max_features='auto', min_samples_split=2, random_state=0))
    # name_list.append('ET')

    return model_list,name_list

#MAPE
项目:GZ_travelTime    作者:zhilonglu    | 项目源码 | 文件源码
def get_model_list():
    model_list, name_list = [], []

    # model_list.append(linear_model.LinearRegression())
    # name_list.append('LR')

    # model_list.append(gaussian_process.GaussianProcessRegressor(alpha=1e-10))
    # name_list.append('GaussianProcess')

    # model_list.append(KNeighborsRegressor(weights = 'uniform',n_neighbors=28))
    # name_list.append('KNN_unif')
    #
    # model_list.append(KNeighborsRegressor(weights = 'distance',n_neighbors=28))
    # name_list.append('KNN_dist')
    #
    # model_list.append(SVR(kernel = 'poly', C = 1, gamma = 'auto', coef0 = 0, degree = 2))
    # name_list.append('SVR_poly')
    # #
    model_list.append(SVR(kernel = 'rbf', C = 0.3, gamma = 'auto'))
    name_list.append('SVR_rbf')
    # #
    # model_list.append(DecisionTreeRegressor())
    # name_list.append('DT')
    #
    # model_list.append(RandomForestRegressor(n_estimators=150, max_depth=None,min_samples_split=2, random_state=0))
    # name_list.append('RF')
    #
    # model_list.append(ExtraTreesRegressor(n_estimators=150, max_depth=None, max_features='auto', min_samples_split=2, random_state=0))
    # name_list.append('ET')

    return model_list,name_list

#????
项目:Tback    作者:ideaplat    | 项目源码 | 文件源码
def get_classifier(self, X, Y):
        """ ????????
        :param X: ????
        :param Y: ??????
        :return: ??
        """

        clf = KNeighborsRegressor(weights='uniform')
        clf.fit(X, Y)
        return clf
项目:predictive_imputer    作者:log0ymxm    | 项目源码 | 文件源码
def fit(self, X, y=None, **kwargs):
        X = check_array(X, dtype=np.float64, force_all_finite=False)

        X_nan = np.isnan(X)
        most_by_nan = X_nan.sum(axis=0).argsort()[::-1]

        imputed = self.initial_imputer.fit_transform(X)
        new_imputed = imputed.copy()

        self.statistics_ = np.ma.getdata(X)
        self.gamma_ = []

        if self.f_model == "RandomForest":
            self.estimators_ = [RandomForestRegressor(n_estimators=50, n_jobs=-1, random_state=i, **kwargs) for i in range(X.shape[1])]
        elif self.f_model == "KNN":
            self.estimators_ = [KNeighborsRegressor(n_neighbors=min(5, sum(~X_nan[:, i])), **kwargs) for i in range(X.shape[1])]
        elif self.f_model == "PCA":
            self.estimators_ = [PCA(n_components=int(np.sqrt(min(X.shape))), whiten=True, **kwargs)]

        for iter in range(self.max_iter):
            if len(self.estimators_) > 1:
                for i in most_by_nan:

                    X_s = np.delete(new_imputed, i, 1)
                    y_nan = X_nan[:, i]

                    X_train = X_s[~y_nan]
                    y_train = new_imputed[~y_nan, i]
                    X_unk = X_s[y_nan]

                    estimator_ = self.estimators_[i]
                    estimator_.fit(X_train, y_train)
                    if len(X_unk) > 0:
                        new_imputed[y_nan, i] = estimator_.predict(X_unk)

            else:
                estimator_ = self.estimators_[0]
                estimator_.fit(new_imputed)
                new_imputed[X_nan] = estimator_.inverse_transform(estimator_.transform(new_imputed))[X_nan]

            gamma = ((new_imputed-imputed)**2/(1e-6+new_imputed.var(axis=0))).sum()/(1e-6+X_nan.sum())
            self.gamma_.append(gamma)
            if np.abs(np.diff(self.gamma_[-2:])) < self.tol:
                break

        return self
项目:Smart-Trader    作者:i-sultan    | 项目源码 | 文件源码
def train_validate(self, df, validation_range, update_progress):
        """ Train and validate regressor on df samples with indices listed in validation_range. """
        training_summary = pd.DataFrame()
        first_sample, samples, labels = prepare_samples(df, self.indicators_samples)

        # progress bar parameters
        total_steps = len(self.model_params['sample_presentation']) * \
                      len(self.model_params['exp_weight']) * len(self.model_params['k'])
        completed_steps = 0

        # loop over model parameters
        for sample_presentation in self.model_params['sample_presentation']:
            presented_samples, presented_labels, normalizer = set_presentation(samples, labels, sample_presentation, self.indicators_samples['Daily'])

            for exp_weight in self.model_params['exp_weight']:
                weighted_samples = apply_exp_weights(presented_samples, exp_weight)

                for k in self.model_params['k']:
                    model, total_train_time, total_test_time = [[0 for i in range (len(h))] for j in range(3)]
                    error_list, relative_error_list, hit_list = [[[] for i in range (len(h))] for j in range(3)]
                    params = (sample_presentation, exp_weight, k)

                    # model training and validation core
                    for h_index in range(len(h)):
                        for index in validation_range:
                            i = index-first_sample                        
                            x_train, x_validate = weighted_samples[:i-h[h_index]+1,:], weighted_samples[i,:] #need to stop training h steps before test
                            y_train, y_validate = presented_labels[h_index][:i-h[h_index]+1], presented_labels[h_index][i]
                            #train
                            t1 = time.time()
                            model[h_index] = KNeighborsRegressor(n_neighbors=k) # train a separate model for each horizon
                            model[h_index].fit(x_train, y_train)
                            t2 = time.time()
                            train_time = (t2-t1)
                            #test
                            y_predict = model[h_index].predict(x_validate.reshape(1,-1))
                            test_time = (time.time()-t2)
                            #apend new results
                            y_validate_absolute = remove_presentation(y_validate,normalizer[i], sample_presentation)
                            y_predict_absolute  = remove_presentation(y_predict ,normalizer[i], sample_presentation)
                            error_list[h_index] += [y_validate_absolute - y_predict_absolute]
                            relative_error_list[h_index] += [(y_validate_absolute - y_predict_absolute)/y_validate_absolute]
                            hit_list[h_index] += [(y_validate-x_validate[-1])*(y_predict-x_validate[-1]) > 0]

                            total_train_time[h_index] += train_time
                            total_test_time[h_index] += test_time
                            if i == len(presented_labels[h_index])-1:
                                #very last training point, include last training oppurtunity
                                x_train = weighted_samples[:i+1,:]
                                y_train = presented_labels[h_index][:i+1]
                                model[h_index].fit(x_train, y_train)
                                break

                    completed_steps += 1
                    update_progress(100.0 * completed_steps/total_steps)

                    #save last trained model, and add to training summary
                    training_summary = training_summary.append(summarize(self, model, error_list, relative_error_list, hit_list, 
                                                                        params, total_train_time, total_test_time))
        return training_summary, make_presentable(training_summary, self.summary_name)
项目:AirTicketPredicting    作者:junlulocky    | 项目源码 | 文件源码
def __init__(self, isTrain):
        super(RegressionUniformBlending, self).__init__(isTrain)
        # data preprocessing
        #self.dataPreprocessing()

        self.net1 = NeuralNet(
                        layers=[  # three layers: one hidden layer
                            ('input', layers.InputLayer),
                            ('hidden', layers.DenseLayer),
                            #('hidden2', layers.DenseLayer),
                            #('hidden3', layers.DenseLayer),
                            ('output', layers.DenseLayer),
                            ],
                        # layer parameters:
                        input_shape=(None, 13),  # input dimension is 13
                        hidden_num_units=6,  # number of units in hidden layer
                        #hidden2_num_units=8,  # number of units in hidden layer
                        #hidden3_num_units=4,  # number of units in hidden layer
                        output_nonlinearity=None,  # output layer uses sigmoid function
                        output_num_units=1,  # output dimension is 1

                        # obejctive function
                        objective_loss_function = lasagne.objectives.squared_error,

                        # optimization method:
                        update=lasagne.updates.nesterov_momentum,
                        update_learning_rate=0.002,
                        update_momentum=0.4,

                        # use 25% as validation
                        train_split=TrainSplit(eval_size=0.2),

                        regression=True,  # flag to indicate we're dealing with regression problem
                        max_epochs=100,  # we want to train this many epochs
                        verbose=0,
                        )

        # Create linear regression object
        self.linRegr = linear_model.LinearRegression()

        # Create KNN regression object
        self.knn = neighbors.KNeighborsRegressor(86, weights='distance')

        # Create Decision Tree regression object
        self.decisionTree = DecisionTreeRegressor(max_depth=7, max_features=None)

        # Create AdaBoost regression object
        decisionReg = DecisionTreeRegressor(max_depth=10)
        rng = np.random.RandomState(1)
        self.adaReg = AdaBoostRegressor(decisionReg,
                          n_estimators=400,
                          random_state=rng)

        # Create linear regression object
        self.model = RandomForestRegressor(max_features='sqrt', n_estimators=32, max_depth=39)
项目:KerasRL    作者:aejax    | 项目源码 | 文件源码
def __init__(self, S, A, maxlen=1000, mode=None, embedding_dim=1, **kwargs):
        super(TableQ2, self).__init__(**kwargs)
        self.S = S
        self.A = A

        if mode == None:
            if type(S) == type(A) == gym.spaces.Discrete:
                self.mode = 'array'
            elif type(A) == gym.spaces.Discrete:
                self.mode = 'dictionary'
            else:
                pass
        self.mode = mode
        self.maxlen = maxlen
        self.embedding_dim = embedding_dim

        if self.mode == 'array':
            s_dim = get_space_dim(S)
            a_dim = get_space_dim(A)
            self.table = np.zeros((s_dim, a_dim))
            self.maxlen = s_dim
        elif self.mode == 'dictionary':
            self.table = {0: np.zeros(self.A.n)}
        elif self.mode == 'tables':
            self.k = 4
            self.neigh = KNeighborsRegressor(n_neighbors=self.k)
            self.states = np.zeros((self.maxlen,self.embedding_dim))
            self.values = np.zeros((self.maxlen, self.A.n))
            self.recency= np.zeros((self.maxlen,))
            self.i = 0
        elif self.mode == 'action_tables':
            #self.states = []
            #self.recency= []
            self.k = 4
            self.action_tables = [ [[],[], KNeighborsRegressor(n_neighbors=self.k), []]
                                   for _ in xrange(self.A.n)]
            """
            for at in self.action_tables:
                states, values, neigh, recency = at
                for _ in xrange(self.k):
                    if self.embedding_dim > 1:
                        states.append(np.ones(self.embedding_dim))
                    else:
                        states.append(1)
                    values.append(0)
                    recency.append(0)
                #print states, values
                #neigh.fit(np.array(states), np.array(values))
                s = self._list_to_sklearn(states)
                v = self._list_to_sklearn(values)
                #print s, v
                neigh.fit(s, v)
            """
        else:
            raise NotImplementedError, 'Sorry, TableQ only supports three modes.'
项目:neoSBM    作者:piratepeel    | 项目源码 | 文件源码
def plot(k=1,xyzFile='xyz_synth_surf.txt',write=False):
    with open(xyzFile) as f:
        xyz=np.float64([row.split() for row in f.readlines()])

    #~ plt.figure()
    #~ plt.scatter(xyz[:, 0], xyz[:, 1], c=xyz[:,2])
    #~ plt.plot(xyz[:3, 0], xyz[:3, 1], c='k', marker='s',ms=10)
    #~ plt.plot(xyz[:50, 0], xyz[:50, 1], xyz[:50,2], c='k', marker='s',ms=3)

    fig=plt.figure()
    ax = fig.add_subplot(111, projection='3d')
    ax.scatter(xyz[:, 0], xyz[:, 1], xyz[:,2], c=xyz[:,2], marker='o',linewidths=0)
    ax.plot(xyz[:50, 0], xyz[:50, 1], xyz[:50,2], c='k', marker='s',ms=3)
    #~ ax.scatter(xyz[:50, 0], xyz[:50, 1], xyz[:50,2], c='k', marker='s',linewidths=0,cmap=plt.cm.bone)

    xmin=np.min(xyz[:,0])
    xmax=np.max(xyz[:,0])
    step=(xmax-xmin)/100.

    x_=np.arange(np.min(xyz[:,0]),np.max(xyz[:,0]),step)
    y_=np.arange(np.min(xyz[:,0]),np.max(xyz[:,0]),step)
    xx,yy=np.meshgrid(x_,y_)

    xy=np.append(xx.ravel()[:,np.newaxis],yy.ravel()[:,np.newaxis],1)

    knn = neighbors.KNeighborsRegressor(k, weights='distance',p=1)
    z_= knn.fit(xyz[:,:2],xyz[:,2]).predict(xy)


    fig=plt.figure()
    ax = fig.add_subplot(111, projection='3d')
    ax.plot_surface(xx, yy, z_.reshape(np.shape(xx)),rstride=1, cstride=1, cmap=plt.cm.spectral,
                       linewidth=0, antialiased=False)
    ax.plot(xyz[:50, 0], xyz[:50, 1], xyz[:50,2], c='k', marker='s',ms=3)
    if write:
        with open('knn_'+ xyzFile,'w') as f:
            for xi,yi,zi in zip(xx.ravel(),yy.ravel(),z_):
                f.write('%f %f %f\n' % (xi,yi,zi))




#################################################################
#calculate a distance matrix based on variation of information
项目:Black-Swan    作者:12190143    | 项目源码 | 文件源码
def main():
    df_test = pd.read_csv("test2.csv")

    df_train0 = pd.read_csv("train.csv")

    df_train_list = [df_train0,]
    random.shuffle(df_train_list)
    df_train = pd.concat(df_train_list)

    df_ts = pd.read_csv("ts_feature2_simple.csv")
    df_date = pd.read_csv("date.csv")

    df_train = df_train.merge(df_date, on="date", how="left")
    df_train = df_train.merge(df_ts, on=["tollgate_id", "hour", "miniute", "direction"], how="left")

    df_test = df_test.merge(df_date, on="date", how="left")
    df_test = df_test.merge(df_ts, on=["tollgate_id", "hour", "miniute", "direction"], how="left")

    df_train_grouped = df_train.groupby(["tollgate_id", "direction"])
    df_test_grouped = df_test.groupby(["tollgate_id", "direction"])
    result = []
    oob = []
    for key, train_data in df_train_grouped:

        test_data = df_test_grouped.get_group(key)
        len_train = len(train_data)
        train_data = train_data.append(test_data)[train_data.columns.tolist()]


        train_data = feature_transform_knn(key, train_data)

        regressor_cubic = KNeighborsRegressor(n_neighbors=8, algorithm="auto")

        train_data = pd.DataFrame.reset_index(train_data)
        train_data = train_data.drop("index", axis=1)
        y = train_data.ix[:len_train - 1, :]["volume"]


        x = train_data.ix[:len_train - 1, 8:]
        print x.head()
        x1 = train_data.ix[len_train:, 8:]

        regressor_cubic.fit(x, y)
        yhat = regressor_cubic.predict(x1)

        test_data["volume"] = yhat
        result.append(test_data[['tollgate_id', 'time_window', 'direction', 'volume']])


    df_result = pd.concat(result, axis=0)
    print np.mean(df_result["volume"])
    df_result.to_csv("result/result_split_knn"+str(np.mean(df_result["volume"]))+".csv", index=False)

    print np.mean(oob)
项目:Parallel-SGD    作者:angadgill    | 项目源码 | 文件源码
def test_precomputed(random_state=42):
    """Tests unsupervised NearestNeighbors with a distance matrix."""
    # Note: smaller samples may result in spurious test success
    rng = np.random.RandomState(random_state)
    X = rng.random_sample((10, 4))
    Y = rng.random_sample((3, 4))
    DXX = metrics.pairwise_distances(X, metric='euclidean')
    DYX = metrics.pairwise_distances(Y, X, metric='euclidean')
    for method in ['kneighbors']:
        # TODO: also test radius_neighbors, but requires different assertion

        # As a feature matrix (n_samples by n_features)
        nbrs_X = neighbors.NearestNeighbors(n_neighbors=3)
        nbrs_X.fit(X)
        dist_X, ind_X = getattr(nbrs_X, method)(Y)

        # As a dense distance matrix (n_samples by n_samples)
        nbrs_D = neighbors.NearestNeighbors(n_neighbors=3, algorithm='brute',
                                            metric='precomputed')
        nbrs_D.fit(DXX)
        dist_D, ind_D = getattr(nbrs_D, method)(DYX)
        assert_array_almost_equal(dist_X, dist_D)
        assert_array_almost_equal(ind_X, ind_D)

        # Check auto works too
        nbrs_D = neighbors.NearestNeighbors(n_neighbors=3, algorithm='auto',
                                            metric='precomputed')
        nbrs_D.fit(DXX)
        dist_D, ind_D = getattr(nbrs_D, method)(DYX)
        assert_array_almost_equal(dist_X, dist_D)
        assert_array_almost_equal(ind_X, ind_D)

        # Check X=None in prediction
        dist_X, ind_X = getattr(nbrs_X, method)(None)
        dist_D, ind_D = getattr(nbrs_D, method)(None)
        assert_array_almost_equal(dist_X, dist_D)
        assert_array_almost_equal(ind_X, ind_D)

        # Must raise a ValueError if the matrix is not of correct shape
        assert_raises(ValueError, getattr(nbrs_D, method), X)

    target = np.arange(X.shape[0])
    for Est in (neighbors.KNeighborsClassifier,
                neighbors.RadiusNeighborsClassifier,
                neighbors.KNeighborsRegressor,
                neighbors.RadiusNeighborsRegressor):
        print(Est)
        est = Est(metric='euclidean')
        est.radius = est.n_neighbors = 1
        pred_X = est.fit(X, target).predict(Y)
        est.metric = 'precomputed'
        pred_D = est.fit(DXX, target).predict(DYX)
        assert_array_almost_equal(pred_X, pred_D)
项目:Parallel-SGD    作者:angadgill    | 项目源码 | 文件源码
def test_neighbors_badargs():
    # Test bad argument values: these should all raise ValueErrors
    assert_raises(ValueError,
                  neighbors.NearestNeighbors,
                  algorithm='blah')

    X = rng.random_sample((10, 2))
    Xsparse = csr_matrix(X)
    y = np.ones(10)

    for cls in (neighbors.KNeighborsClassifier,
                neighbors.RadiusNeighborsClassifier,
                neighbors.KNeighborsRegressor,
                neighbors.RadiusNeighborsRegressor):
        assert_raises(ValueError,
                      cls,
                      weights='blah')
        assert_raises(ValueError,
                      cls, p=-1)
        assert_raises(ValueError,
                      cls, algorithm='blah')
        nbrs = cls(algorithm='ball_tree', metric='haversine')
        assert_raises(ValueError,
                      nbrs.predict,
                      X)
        assert_raises(ValueError,
                      ignore_warnings(nbrs.fit),
                      Xsparse, y)
        nbrs = cls()
        assert_raises(ValueError,
                      nbrs.fit,
                      np.ones((0, 2)), np.ones(0))
        assert_raises(ValueError,
                      nbrs.fit,
                      X[:, :, None], y)
        nbrs.fit(X, y)
        assert_raises(ValueError,
                      nbrs.predict,
                      [[]])
        if (isinstance(cls, neighbors.KNeighborsClassifier) or
                isinstance(cls, neighbors.KNeighborsRegressor)):
            nbrs = cls(n_neighbors=-1)
            assert_raises(ValueError, nbrs.fit, X, y)

    nbrs = neighbors.NearestNeighbors().fit(X)

    assert_raises(ValueError, nbrs.kneighbors_graph, X, mode='blah')
    assert_raises(ValueError, nbrs.radius_neighbors_graph, X, mode='blah')
项目:GZ_travelTime    作者:zhilonglu    | 项目源码 | 文件源码
def get_model_list(task_name):

    model_list, name_list = [], []

    model_list.append(linear_model.LinearRegression())
    name_list.append('LR')
    #
    model_list.append(linear_model.SGDRegressor())
    name_list.append('LR_SGD')

    model_list.append(linear_model.Lasso(alpha = 1.0))
    name_list.append('Lasso')

    model_list.append(linear_model.Ridge (alpha = 1.0))
    name_list.append('Ridge')

    model_list.append(linear_model.LassoLars(alpha=.1))
    name_list.append('LassoLars')

    model_list.append(linear_model.BayesianRidge())
    name_list.append('BayesianRidge')

    model_list.append(KernelRidge(alpha=1.0))
    name_list.append('KernelRidge')

    model_list.append(gaussian_process.GaussianProcess(theta0=1e-2, thetaL=1e-4, thetaU=1e-1))
    name_list.append('GaussianProcess')

    model_list.append(KNeighborsRegressor(weights = 'uniform',n_neighbors=3))
    name_list.append('KNN_unif')

    model_list.append(KNeighborsRegressor(weights = 'distance',n_neighbors=3))
    name_list.append('KNN_dist')

    model_list.append(SVR(kernel = 'linear', C = 1, gamma = 'auto', coef0 = 0, degree = 2))
    name_list.append('SVM_linear')

    model_list.append(SVR(kernel = 'poly', C = 1, gamma = 'auto', coef0 = 0, degree = 2))
    name_list.append('SVM_poly')

    model_list.append(SVR(kernel = 'rbf', C = 1, gamma = 'auto', coef0 = 0, degree = 2))
    name_list.append('SVM_rbf')

    model_list.append(DecisionTreeRegressor())
    name_list.append('DT')

    model_list.append(RandomForestRegressor(n_estimators=100, max_depth=None,min_samples_split=2, random_state=0))
    name_list.append('RF')

    model_list.append(ExtraTreesRegressor(n_estimators=100, max_depth=None, max_features='auto', min_samples_split=2, random_state=0))
    name_list.append('ET')

    return model_list, name_list