Python sklearn.linear_model 模块,ElasticNet() 实例源码

我们从Python开源项目中,提取了以下32个代码示例,用于说明如何使用sklearn.linear_model.ElasticNet()

项目:mlens    作者:flennerhag    | 项目源码 | 文件源码
def build_ensemble(**kwargs):
    """Generate ensemble."""

    ens = SuperLearner(**kwargs)
    prep = {'Standard Scaling': [StandardScaler()],
            'Min Max Scaling': [MinMaxScaler()],
            'No Preprocessing': []}

    est = {'Standard Scaling':
               [ElasticNet(), Lasso(), KNeighborsRegressor()],
           'Min Max Scaling':
               [SVR()],
           'No Preprocessing':
               [RandomForestRegressor(random_state=SEED),
                GradientBoostingRegressor()]}

    ens.add(est, prep)

    ens.add(GradientBoostingRegressor(), meta=True)

    return ens
项目:strategy    作者:kanghua309    | 项目源码 | 文件源码
def model_cross_valid(X,Y):
    seed = 7
    kfold = model_selection.KFold(n_splits=10, random_state=seed)
    def bulid_model(model_name):
        model = model_name()
        return model
    scoring = 'neg_mean_squared_error'
    # + random fest boost lstm gbdt

    for model_name in [LinearRegression,ElasticNet]:
    #for model_name in [LinearRegression,Ridge,Lasso,ElasticNet,KNeighborsRegressor,DecisionTreeRegressor,SVR,RandomForestRegressor,AdaBoostRegressor,GradientBoostingRegressor]:
        model = bulid_model(model_name)
        results = model_selection.cross_val_score(model, X, Y, cv=kfold, scoring=scoring)
        print(model_name,results.mean())
项目:PonyGE2    作者:PonyGE    | 项目源码 | 文件源码
def fit_enet(train_X, train_y, test_X):
    """
    Use linear regression to predict. Elastic net is LR with L1 and L2
    regularisation.

    :param train_X:
    :param train_y:
    :param test_X:
    :return:
    """
    enet = ElasticNet()
    enet.fit(train_X, train_y)
    model = "ElasticNet int %.2f coefs %s" % (enet.intercept_, pprint(enet.coef_))
    yhat_train = enet.predict(train_X)
    yhat_test = enet.predict(test_X)

    return model, yhat_train, yhat_test
项目:EarlyWarning    作者:wjlei1990    | 项目源码 | 文件源码
def train_EN_model(_train_x, train_y, _predict_x):
    print_title("ElasticNet")
    train_x, predict_x = \
        standarize_feature(_train_x, _predict_x)

    #l1_ratios = [1e-4, 1e-3, 1e-2, 1e-1]
    #l1_ratios = [1e-5, 1e-4, 1e-3]
    l1_ratios = [0.9, 0.92, 0.95, 0.97, 0.99]
    #l1_ratios = [0.5]
    min_mse = 1
    for r in l1_ratios:
        t1 = time.time()
        reg_en = linear_model.ElasticNetCV(
            l1_ratio=r, cv=5, n_jobs=4, verbose=1, precompute=True)
        reg_en.fit(train_x, train_y)
        n_nonzeros = (reg_en.coef_ != 0).sum()
        _mse = np.mean(reg_en.mse_path_, axis=1)[
            np.where(reg_en.alphas_ == reg_en.alpha_)[0][0]]
        if _mse < min_mse:
            min_mse = _mse
            best_l1_ratio = r
            best_alpha = reg_en.alpha_
        t2 = time.time()
        print("ratio(%e) -- n: %d -- alpha: %f -- mse: %f -- "
              "time: %.2f sec" %
              (r, n_nonzeros, reg_en.alpha_, _mse, t2 - t1))

    print("Best l1_ratio and alpha: %f, %f" % (best_l1_ratio, best_alpha))
    # predict_model
    reg = linear_model.ElasticNet(l1_ratio=best_l1_ratio, alpha=best_alpha)
    reg.fit(train_x, train_y)
    predict_y = reg.predict(predict_x)
    train_y_pred = reg.predict(train_x)
    return {"y": predict_y, "train_y": train_y_pred, "coef": reg.coef_}
项目:PySAT_Point_Spectra_GUI    作者:USGS-Astrogeology    | 项目源码 | 文件源码
def connectWidgets(self):
        en = ElasticNet()

        self.minalpha_spin.setValue(0.0000001)
        self.maxalpha_spin.setValue(0.01)
        self.nalpha_spin.setValue(100)

        self.enl1_ratioLineEdit.setText('0.1, 0.5, 0.7, 0.9, 0.95, 0.99, 1.0')
        self.enfit_intercept_list.setCurrentItem(self.enfit_intercept_list.findItems(str(en.fit_intercept),QtCore.Qt.MatchExactly)[0])
        self.ennormalize_list.setCurrentItem(self.ennormalize_list.findItems(str(en.normalize),QtCore.Qt.MatchExactly)[0])
        #self.enprecomputeCheckBox.setChecked(en.precompute)
        self.enmax_iterLineEdit.setText(str(en.max_iter))
        #self.encopy_XCheckBox.setChecked(en.copy_X)
        self.entolLineEdit.setText(str(en.tol))
        self.enwarm_start_list.setCurrentItem(self.enwarm_start_list.findItems(str(en.warm_start),QtCore.Qt.MatchExactly)[0])
        self.enpositive_list.setCurrentItem(self.enpositive_list.findItems(str(en.positive),QtCore.Qt.MatchExactly)[0])
        #self.setComboBox(self.enselectionComboBox, ['cyclic', 'random'])
        #self.defaultComboItem(self.enselectionComboBox, en.selection)
项目:sanergy-public    作者:dssg    | 项目源码 | 文件源码
def define_model(self):
        #if self.modeltype == "AR" :
        #    return statsmodels.tsa.ar_model.AR(max_order=self.parameters['max_order'])
        if self.modeltype == "RandomForest" :
            return ensemble.RandomForestRegressor(n_estimators=self.parameters['n_estimators'])
            #return ensemble.RandomForestClassifier(
            #    n_estimators=self.parameters['n_estimators'])
        elif self.modeltype == "LinearRegression" :
            return linear_model.LinearRegression()
        elif self.modeltype == "Lasso" :
            return linear_model.Lasso(
            alpha=self.parameters['alpha'])
        elif self.modeltype == "ElasticNet" :
            return linear_model.ElasticNet(
            alpha=self.parameters['alpha'],
            l1_ratio=self.parameters['l1_ratio'])
        elif self.modeltype == "SVR" :
            return SVR(
            C=self.parameters['C'],
            epsilon=self.parameters['epsilon'],
            kernel=self.parameters['kernel'])
        #elif self.modeltype == 'StaticModel':
        #   return StaticModel (
        #      parameters=self.parameters
        #     )
        #elif self.modeltype == 'AdvancedStaticModel':
        #   return AdvancedStaticModel (
        #       parameters=self.parameters
        #        )

        # elif self.modeltype == 'SGDRegressor' :
        #     print(self.parameters)
        #     return linear_model.SGDRegressor(
        #     loss=self.parameters['loss'],
        #     penalty=self.parameters['penalty'],
        #     l1_ratio=self.parameters['l1_ratio'])
        else:
            raise ConfigError("Unsupported model {0}".format(self.modeltype))
项目:FreeDiscovery    作者:FreeDiscovery    | 项目源码 | 文件源码
def test_n_clusters():
    # Test that n_clusters param works properly
    X, y = make_blobs(n_samples=100, centers=10)
    brc1 = Birch(n_clusters=10)
    brc1.fit(X)
    assert_greater(len(brc1.subcluster_centers_), 10)
    assert_equal(len(np.unique(brc1.labels_)), 10)

    # Test that n_clusters = Agglomerative Clustering gives
    # the same results.
    gc = AgglomerativeClustering(n_clusters=10)
    brc2 = Birch(n_clusters=gc)
    brc2.fit(X)
    assert_array_equal(brc1.subcluster_labels_, brc2.subcluster_labels_)
    assert_array_equal(brc1.labels_, brc2.labels_)

    # Test that the wrong global clustering step raises an Error.
    clf = ElasticNet()
    brc3 = Birch(n_clusters=clf)
    assert_raises(ValueError, brc3.fit, X)

    # Test that a small number of clusters raises a warning.
    brc4 = Birch(threshold=10000.)
    assert_warns(UserWarning, brc4.fit, X)
项目:strategy    作者:kanghua309    | 项目源码 | 文件源码
def model_fit_and_test(TrainX,TrainY,TestX,TestY):
    def bulid_model(model_name):
        model = model_name()
        return model
    #for model_name in [LinearRegression, Ridge, Lasso, ElasticNet, KNeighborsRegressor, DecisionTreeRegressor, SVR,RandomForestRegressor, AdaBoostRegressor, GradientBoostingRegressor]:
    for model_name in [LinearRegression, ElasticNet]:
        model = bulid_model(model_name)
        model.fit(TrainX,TrainY)
        print(model_name)
        resid = model.predict(TestX) - TestY
        #print resid
        print("Residual sum of squares: %f"% np.mean(resid ** 2))
        #print model.predict(TestX)
        #print TestY
        # Explained variance score: 1 is perfect prediction
        plt.scatter(model.predict(TestX), resid);
        plt.axhline(0, color='red')
        plt.xlabel('Predicted Values')
        plt.ylabel('Residuals')
        #plt.xlim([1, 50])
        plt.show()

        print('Variance score: %.2f' % model.score(TestX, TestY))

        from statsmodels.stats.stattools import jarque_bera
        _, pvalue, _, _ = jarque_bera(resid)
        print ("Test Residuals Normal", pvalue)

        from statsmodels import regression, stats
        import statsmodels.api as sms
        import statsmodels.stats.diagnostic as smd
        # xs_with_constant = sms.add_constant(np.column_stack((X1,X2,X3,X4)))
        xs_with_constant = sms.add_constant(TestX)
        _, pvalue1, _, _ = stats.diagnostic.het_breushpagan(resid, xs_with_constant)
        print ("Test Heteroskedasticity", pvalue1)
        ljung_box = smd.acorr_ljungbox(resid, lags=10)

        #print "Lagrange Multiplier Statistics:", ljung_box[0]
        print "Test Autocorrelation P-values:", ljung_box[1]
        if any(ljung_box[1] < 0.05):
            print "The residuals are autocorrelated."
        else:
            print "The residuals are not autocorrelated."
项目:mlens    作者:flennerhag    | 项目源码 | 文件源码
def build_ensemble(**kwargs):
    """Generate ensemble."""

    ens = SuperLearner(**kwargs)

    est = [ElasticNet(copy_X=False),
           Lasso(copy_X=False)]

    ens.add(est)
    ens.add(KNeighborsRegressor())

    return ens
项目:scattertext    作者:JasonKessler    | 项目源码 | 文件源码
def get_regression_coefs(self, category, clf=ElasticNet()):
        ''' Computes regression score of tdfidf transformed features
        Parameters
        ----------
        category : str
            category name to score
        clf : sklearn regressor

        Returns
        -------
        coefficient array
        '''
        self._fit_tfidf_model(category, clf)
        return clf.coef_
项目:eezzy    作者:3Blades    | 项目源码 | 文件源码
def spot_check(X, y):
    if type == 'regression':
        models = [
        (LinearRegression(), 'Ordinary Least Squares'),
        (Ridge(alpha=0.1), 'Ridge (alpha 0.1)'),
        (Ridge(), 'Ridge (alpha 1.0)'),
        (Lasso(alpha=0.1), 'Lasso (alpha 0.1)'),
        (Lasso(), 'Lasso (alpha 1.0)'),
        (ElasticNet(alpha=0.1), 'ElasticNet (alpha 0.1)'),
        (ElasticNet(), 'ElasticNet (alpha 1.0)'),
        (DecisionTreeRegressor(), 'Decision Tree'),
        (KNeighborsRegressor(), 'K-Nearest Neighbors'),

#         (RandomForestRegressor(), 'Random Forest Regressor'),
#         (BaggingRegressor(), 'Bagging Regressor'),
#         (GradientBoostingRegressor(), 'Gradient Bosted Regression'),
#         (SVR(), 'Support Vector Regression')
    ]

    splits = 5
    scores = []

    for model, model_name in models:
        score = check_model(model, splits, X, y)
        # get average score
        scores.append(score)

    model_names = map(lambda x: x[1], models)
    for name, score in zip(model_names, scores):
        print('%s: %f' % (name, score))
项目:yellowbrick    作者:DistrictDataLabs    | 项目源码 | 文件源码
def test_regressor_cv(self):
        """
        Ensure only "CV" regressors are allowed
        """

        for model in (SVR, Ridge, Lasso, LassoLars, ElasticNet):
            with self.assertRaises(YellowbrickTypeError):
                alphas = AlphaSelection(model())

        for model in (RidgeCV, LassoCV, LassoLarsCV, ElasticNetCV):
            try:
                alphas = AlphaSelection(model())
            except YellowbrickTypeError:
                self.fail("could not instantiate RegressorCV on alpha selection")
项目:PySAT_Point_Spectra_GUI    作者:USGS-Astrogeology    | 项目源码 | 文件源码
def connectWidgets(self):
        self.elasticNetCVGroupBox.setHidden(True)
        en = ElasticNet()
        encv = ElasticNetCV()

        self.alpha_text.setText(str(en.alpha))
        self.enl1_ratioDoubleSpinBox.setValue(en.l1_ratio)
        self.enfit_interceptCheckBox.setChecked(en.fit_intercept)
        self.ennormalizeCheckBox.setChecked(en.normalize)
        self.enprecomputeCheckBox.setChecked(en.precompute)
        self.enmax_iterSpinBox.setValue(en.max_iter)
        self.encopy_XCheckBox.setChecked(en.copy_X)
        self.entolDoubleSpinBox.setValue(en.tol)
        self.enwarm_startCheckBox.setChecked(en.warm_start)
        self.enpositiveCheckBox.setChecked(en.positive)
        self.setComboBox(self.enselectionComboBox, ['cyclic', 'random'])
        self.defaultComboItem(self.enselectionComboBox, en.selection)

        self.l1_ratioDoubleSpinBox.setValue(encv.l1_ratio)
        self.epsDoubleSpinBox.setValue(encv.eps)
        self.n_alphasSpinBox.setValue(encv.n_alphas)
        self.alphasLineEdit.setText('None')
        self.fit_interceptCheckBox.setChecked(encv.fit_intercept)
        self.normalizeCheckBox.setChecked(encv.normalize)
        self.setComboBox(self.precomputeComboBox, ['True', 'False', 'auto', 'array-like'])
        self.defaultComboItem(self.precomputeComboBox, encv.precompute)
        self.max_iterSpinBox.setValue(encv.max_iter)
        self.tolDoubleSpinBox.setValue(encv.tol)
        self.cVSpinBox.setValue(3)
        self.copy_XCheckBox.setChecked(encv.copy_X)
        self.verboseCheckBox.setChecked(encv.verbose)
        self.n_jobsSpinBox.setValue(encv.n_jobs)
        self.positiveCheckBox.setChecked(encv.positive)
        self.setComboBox(self.selectionComboBox, ['cyclic', 'random'])
        self.defaultComboItem(self.selectionComboBox, encv.selection)
项目:sportsball    作者:jgershen    | 项目源码 | 文件源码
def build_model(train_file, attr_file, model_out, algorithm='ridge'):
  classifiers = ['ridge', 'linear', 'lasso', 'rf', 'en']
  if algorithm not in classifiers:
    raise NotImplementedError("only implemented algorithms: " + str(classifiers))

  train_data = pd.read_pickle(train_file)

  attrs = read_attrs(attr_file)
  target_attr = attrs[0]
  usable_attrs = attrs[1:]

  if algorithm == 'ridge':
    clf = Ridge()
  elif algorithm == 'linear':
    clf = LinearRegression()
  elif algorithm == 'lasso':
    clf = Lasso()
  elif algorithm == 'en':
    clf = ElasticNet()
  else:
    clf = RandomForestRegressor()

  logger.debug("Modeling '%s'", target_attr)
  logger.debug("    train set (%d): %s", len(train_data), train_file)
  logger.debug("  Algorithm: %s", algorithm)
  if hasattr(clf, 'coef_'):
    logger.debug('Coefficients:')
    for i,c in enumerate(clf.coef_):
      logger.debug('    %-20s' % usable_attrs[i] + ':', '%20.4f' % c)
  clf.fit(train_data[usable_attrs], train_data[target_attr])

  pickle.dump(clf, open(model_out, 'wb'))
项目:jamespy_py3    作者:jskDr    | 项目源码 | 文件源码
def gs_ElasticNet( xM, yV, en_params):

    print(xM.shape, yV.shape)

    clf = linear_model.ElasticNet()
    kf5_c = model_selection.KFold( n_splits = 5, shuffle=True)
    kf5 = kf5_c.split( xM)
    gs = model_selection.GridSearchCV( clf, en_params, scoring = 'r2', cv = kf5_c, n_jobs = -1)

    gs.fit( xM, yV)

    return gs
项目:jamespy_py3    作者:jskDr    | 项目源码 | 文件源码
def gs_ElasticNet( xM, yV, en_params):

    print(xM.shape, yV.shape)

    clf = linear_model.ElasticNet()
    kf5_c = model_selection.KFold( n_folds=5, shuffle=True)
    kf5 = kf5_c.split( xM) 
    gs = model_selection.GridSearchCV( clf, en_params, scoring = 'r2', cv = kf5, n_jobs = -1)

    gs.fit( xM, yV)

    return gs
项目:jamespy_py3    作者:jskDr    | 项目源码 | 文件源码
def gs_ElasticNet( xM, yV, en_params):

    print(xM.shape, yV.shape)

    clf = linear_model.ElasticNet()
    kf5_c = model_selection.KFold( n_splits = 5, shuffle=True)
    kf5 = kf5_c.split( xM)
    gs = model_selection.GridSearchCV( clf, en_params, scoring = 'r2', cv = kf5_c, n_jobs = -1)

    gs.fit( xM, yV)

    return gs
项目:jamespy_py3    作者:jskDr    | 项目源码 | 文件源码
def gen_WR_pilot_ch(self, pilot_SNRdB, alpha_l1r = 0, model = "Ridge"):

        """
        The reception process with pilot channel estimation
        is conducted.
        """
        Npilot = self.Npilot

        SNRpilot = db2var( pilot_SNRdB)

        BPSK, s_a, x_flat_a, x_a = gen_BPSK( Npilot, self.Nt)
        # H_a = gen_H( self.Nr, self.Nt)
        # H_a = self.H_a
        y_a = gen_Rx( self.Nr, Npilot, SNRpilot, self.H_a, x_a)

        yT_a = y_a.T

        # print( x_a.shape, yT_a.shape)

        # Now you can use either Ridge or Lasso methods. 
        #lm = linear_model.Ridge( alpha)
        if model == "ElasticNet":
            lm = linear_model.ElasticNet( alpha_l1r[0], alpha_l1r[1])
        else:
            lm = getattr( linear_model, model)(alpha_l1r)

        lm.fit( yT_a, x_a)
        self.W_a = lm.coef_

        # print( "np.dot( W_a, H_a) =", np.dot( self.W_a, self.H_a))

        self.gen_Decoding()
项目:jamespy_py3    作者:jskDr    | 项目源码 | 文件源码
def gen_WR_pilot_only(self, alpha_l1r = 0):
        """
        yT_a and x_a was prepared already. 
        Now, W_a is calculated using alpha and then, 
        decode data. 
        For linear regression, alpha_l1r should not be specified except 0.
        """

        yT_a = self.rx_p["yT_a"]
        x_a = self.rx_p["x_a"]

        # for alpha == 0, model is changed to linear regression.  
        if alpha_l1r == 0:
            model = "LinearRegression"
        else:
            model = self.model

        if model == "LinearRegression":
            lm = linear_model.LinearRegression()
        elif model == "ElasticNet":
            lm = linear_model.ElasticNet( alpha_l1r[0], alpha_l1r[1])
        else: # This is either Ridge or Lasso
            lm = getattr( linear_model, model)(alpha_l1r)

        lm.fit( yT_a, x_a)
        self.W_a = lm.coef_

        # print( "np.dot( W_a, H_a) =", np.dot( self.W_a, self.H_a))

        self.gen_Decoding()
项目:jamespy_py3    作者:jskDr    | 项目源码 | 文件源码
def gs_ElasticNet(xM, yV, en_params):

    print(xM.shape, yV.shape)

    clf = linear_model.ElasticNet()
    kf5_c = model_selection.KFold(n_splits=5, shuffle=True)
    kf5 = kf5_c.split(xM)
    gs = model_selection.GridSearchCV(
        clf, en_params, scoring='r2', cv=kf5, n_jobs=-1)

    gs.fit(xM, yV)

    return gs
项目:jamespy_py3    作者:jskDr    | 项目源码 | 文件源码
def gs_ElasticNet( xM, yV, en_params):

    print(xM.shape, yV.shape)

    clf = linear_model.ElasticNet()
    kf5 = cross_validation.KFold( xM.shape[0], n_folds=5, shuffle=True)
    gs = grid_search.GridSearchCV( clf, en_params, scoring = 'r2', cv = kf5, n_jobs = -1)

    gs.fit( xM, yV)

    return gs
项目:jamespy_py3    作者:jskDr    | 项目源码 | 文件源码
def gs_ElasticNet( xM, yV, en_params):

    print(xM.shape, yV.shape)

    clf = linear_model.ElasticNet()
    kf5 = cross_validation.KFold( xM.shape[0], n_folds=5, shuffle=True)
    gs = grid_search.GridSearchCV( clf, en_params, scoring = 'r2', cv = kf5, n_jobs = -1)

    gs.fit( xM, yV)

    return gs
项目:sia-cog    作者:deepakkumar1984    | 项目源码 | 文件源码
def getModels():
    result = []
    result.append("LinearRegression")
    result.append("BayesianRidge")
    result.append("ARDRegression")
    result.append("ElasticNet")
    result.append("HuberRegressor")
    result.append("Lasso")
    result.append("LassoLars")
    result.append("Rigid")
    result.append("SGDRegressor")
    result.append("SVR")
    result.append("MLPClassifier")
    result.append("KNeighborsClassifier")
    result.append("SVC")
    result.append("GaussianProcessClassifier")
    result.append("DecisionTreeClassifier")
    result.append("RandomForestClassifier")
    result.append("AdaBoostClassifier")
    result.append("GaussianNB")
    result.append("LogisticRegression")
    result.append("QuadraticDiscriminantAnalysis")
    return result
项目:ML-note    作者:JasonK93    | 项目源码 | 文件源码
def test_ElasticNet(*data):
    '''
    test for Elastic Net
    :param data: train_data, test_data, train_value, test_value
    :return: None
    '''
    X_train,X_test,y_train,y_test=data
    regr = linear_model.ElasticNet()
    regr.fit(X_train, y_train)
    print('Coefficients:{0}, intercept {1}'.format(regr.coef_, regr.intercept_))
    print("Residual sum of squares: {0}".format(np.mean((regr.predict(X_test) - y_test) ** 2)))
    print('Score: {0}'.format(regr.score(X_test, y_test)))
项目:ML-note    作者:JasonK93    | 项目源码 | 文件源码
def test_ElasticNet_alpha_rho(*data):
    '''
    test score with different alpha and l1_ratio
    :param data: train_data, test_data, train_value, test_value
    :return: None
    '''
    X_train,X_test,y_train,y_test=data
    alphas=np.logspace(-2,2)
    rhos=np.linspace(0.01,1)
    scores=[]
    for alpha in alphas:
            for rho in rhos:
                regr = linear_model.ElasticNet(alpha=alpha,l1_ratio=rho)
                regr.fit(X_train, y_train)
                scores.append(regr.score(X_test, y_test))
    ## graph
    alphas, rhos = np.meshgrid(alphas, rhos)
    scores=np.array(scores).reshape(alphas.shape)
    from mpl_toolkits.mplot3d import Axes3D # this part works well in py3
    from matplotlib import cm
    fig=plt.figure()
    ax=Axes3D(fig)
    surf = ax.plot_surface(alphas, rhos, scores, rstride=1, cstride=1, cmap=cm.jet,
        linewidth=0, antialiased=False)
    fig.colorbar(surf, shrink=0.5, aspect=5)
    ax.set_xlabel(r"$\alpha$")
    ax.set_ylabel(r"$\rho$")
    ax.set_zlabel("score")
    ax.set_title("ElasticNet")
    plt.show()
项目:Parallel-SGD    作者:angadgill    | 项目源码 | 文件源码
def test_elasticnet_convergence(self):
        # Check that the SGD output is consistent with coordinate descent

        n_samples, n_features = 1000, 5
        rng = np.random.RandomState(0)
        X = np.random.randn(n_samples, n_features)
        # ground_truth linear model that generate y from X and to which the
        # models should converge if the regularizer would be set to 0.0
        ground_truth_coef = rng.randn(n_features)
        y = np.dot(X, ground_truth_coef)

        # XXX: alpha = 0.1 seems to cause convergence problems
        for alpha in [0.01, 0.001]:
            for l1_ratio in [0.5, 0.8, 1.0]:
                cd = linear_model.ElasticNet(alpha=alpha, l1_ratio=l1_ratio,
                                             fit_intercept=False)
                cd.fit(X, y)
                sgd = self.factory(penalty='elasticnet', n_iter=50,
                                   alpha=alpha, l1_ratio=l1_ratio,
                                   fit_intercept=False)
                sgd.fit(X, y)
                err_msg = ("cd and sgd did not converge to comparable "
                           "results for alpha=%f and l1_ratio=%f"
                           % (alpha, l1_ratio))
                assert_almost_equal(cd.coef_, sgd.coef_, decimal=2,
                                    err_msg=err_msg)
项目:Parallel-SGD    作者:angadgill    | 项目源码 | 文件源码
def test_n_clusters():
    # Test that n_clusters param works properly
    X, y = make_blobs(n_samples=100, centers=10)
    brc1 = Birch(n_clusters=10)
    brc1.fit(X)
    assert_greater(len(brc1.subcluster_centers_), 10)
    assert_equal(len(np.unique(brc1.labels_)), 10)

    # Test that n_clusters = Agglomerative Clustering gives
    # the same results.
    gc = AgglomerativeClustering(n_clusters=10)
    brc2 = Birch(n_clusters=gc)
    brc2.fit(X)
    assert_array_equal(brc1.subcluster_labels_, brc2.subcluster_labels_)
    assert_array_equal(brc1.labels_, brc2.labels_)

    # Test that the wrong global clustering step raises an Error.
    clf = ElasticNet()
    brc3 = Birch(n_clusters=clf)
    assert_raises(ValueError, brc3.fit, X)

    # Test that a small number of clusters raises a warning.
    brc4 = Birch(threshold=10000.)
    assert_warns(UserWarning, brc4.fit, X)
项目:kaggle    作者:RankingAI    | 项目源码 | 文件源码
def submit(self):
      """"""
      ## retrain with the whole training data
      self.TrainData = self.TrainData[(self.TrainData['logerror'] > self._low) & (self.TrainData['logerror'] < self._up)]

      self.TrainData['longitude'] -= -118600000
      self.TrainData['latitude'] -= 34220000

      X = self.TrainData.drop(self._l_drop_cols, axis=1)
      Y = self.TrainData['logerror']
      X = X.values.astype(np.float32, copy=False)

      en = ElasticNet(alpha= self._alpha, l1_ratio = self._ratio, max_iter= self._iter, tol= 1e-4, selection= self._sel, random_state= 2017)
      self._model = en.fit(X, Y)

      del self.TrainData, X, Y
      gc.collect()

      self.TestData = self._data.LoadFromHdfFile(self.InputDir, 'test')
      #self.TestData = self.TestData.sample(frac = 0.01)

      self._sub = pd.DataFrame(index=self.TestData.index)
      self._sub['ParcelId'] = self.TestData['parcelid']

      self.TestData['longitude'] -= -118600000
      self.TestData['latitude'] -= 34220000
      N = 200000
      start = time.time()
      for d in self._l_test_predict_columns:
         s0 = time.time()

         print('Prediction for column %s ' % d)
         l_test_columns = ['%s%s' % (c, d) if (c in ['lastgap', 'monthyear', 'buildingage']) else c for c in
                           self._l_train_columns]
         x_test = self.TestData[l_test_columns]

         for idx in range(0, len(x_test), N):
            x_test_block = x_test[idx:idx + N].values.astype(np.float32, copy=False)
            ret = self._model.predict(x_test_block)# * 0.99 + 0.011 * 0.01
            self._sub.loc[x_test[idx:idx + N].index, d] = ret
            print(np.mean(np.abs(ret)))

         e0 = time.time()
         print('Prediction for column %s is done. time elapsed %ds' % (d, (e0 - s0)))

      ## clean
      del self.TestData
      gc.collect()

      end = time.time()
      print('Prediction is done. time elapsed %ds' % (end - start))

      if (os.path.exists(self.OutputDir) == False):
         os.makedirs(self.OutputDir)

      self._sub.to_csv(
         '{0}/{1}_{2}.csv'.format(self.OutputDir, self.__class__.__name__, datetime.now().strftime('%Y%m%d-%H:%M:%S')),
         index=False, float_format='%.4f')
项目:karura    作者:icoxfog417    | 项目源码 | 文件源码
def build(self, dataset):
        evaluators = []
        cv = 5  # todo: have to adjust to dataset size

        if self.field_manager.target.is_categorizable():
            parameter_candidates = [
                {"kernel": ["linear"], "C": [1, 10, 100]},
                {"kernel": ["rbf"], "gamma": [1e-1, 1e-2, 1e-3, 1e-4], "C": [1, 10, 100]}
            ]
            # todo: have to think about scoring parameter (default is accuracy, so f1 related score may be appropriate)
            evaluator = GridSearchCV(
                SVC(C=1),
                parameter_candidates,
                cv=cv
            )
            evaluators.append(evaluator)
        else:

            evaluator1 = GridSearchCV(
                linear_model.ElasticNet(),
                {"alpha": [0.1, 0.5, 0.7, 1], "l1_ratio": [(r + 1) / 10 for r in range(10)]},
                cv=cv
            )

            parameter_candidates = [
                {"kernel": ["rbf"], "gamma": [1e-3, 1e-4], "C": [1, 10, 100]}
            ]

            # todo: have to think about scoring parameter (default is accuracy, so f1 related score may be appropriate)
            evaluator2 = GridSearchCV(
                SVR(C=1),
                parameter_candidates,
                cv=cv
            )
            evaluators.append(evaluator1)
            evaluators.append(evaluator2)

        self.model_score = 0
        self.model = None
        for e in evaluators:
            e.fit(dataset.data, dataset.target)
            if e.best_score_ > self.model_score:
                self.model_score = e.best_score_
                self.model = e.best_estimator_
项目:PySAT_Point_Spectra_GUI    作者:USGS-Astrogeology    | 项目源码 | 文件源码
def function(self):
        p_attrib = {'False': False, 'True': True, 'Array-like': 'array-like'}
        r_attrib = {'None': None}
        # TODO Add back the random state later.
        # try:
        #     r_state = int(self.randomStateLineEdit.text())
        # except:
        #     r_state = r_attrib[self.randomStateLineEdit.text()]

        index = self.precomputeComboBox.currentIndex()
        precomputeComboBox = self.precomputeComboBox.itemText(index)

        if self.CVCheckBox.isChecked():
            params = {
                'l1_ratio': self.l1_ratioDoubleSpinBox.value(),
                'eps': self.epsDoubleSpinBox.value(),
                'n_alphas': self.n_alphasSpinBox.value(),
                'alphas': {'None': None}.get(self.alphasLineEdit.text()),
                'fit_intercept': self.fit_interceptCheckBox.isChecked(),
                'normalize': self.normalizeCheckBox.isChecked(),
                'precompute': self.precomputeComboBox.currentText(),
                'max_iter': self.max_iterSpinBox.value(),
                'tol': self.max_iterSpinBox.value(),
                'cv': self.cVSpinBox.value(),
                'copy_X': self.copy_XCheckBox.isChecked(),
                'verbose': self.verboseCheckBox.isChecked(),
                'n_jobs': self.n_jobsSpinBox.value(),
                'positive': self.positiveCheckBox.isChecked(),
                'selection': self.selectionComboBox.currentText(),
                'CV': self.CVCheckBox.isChecked()}
            return params, self.getChangedValues(params, ElasticNetCV())

        else:
            params = {
                'alpha': self.alpha_text.value(),
                'l1_ratio': self.enl1_ratioDoubleSpinBox.value(),
                'fit_intercept': self.enfit_interceptCheckBox.isChecked(),
                'normalize': self.ennormalizeCheckBox.isChecked(),
                'precompute': self.enprecomputeCheckBox.isChecked(),
                'max_iter': self.enmax_iterSpinBox.value(),
                'copy_X': self.encopy_XCheckBox.isChecked(),
                'tol': self.entolDoubleSpinBox.value(),
                'warm_start': self.enwarm_startCheckBox.isChecked(),
                'positive': self.enpositiveCheckBox.isChecked(),
                'selection': self.selectionComboBox.currentText(),
                'CV': self.CVCheckBox.isChecked()}
            return params, self.getChangedValues(params, ElasticNet())
项目:sportsball    作者:jgershen    | 项目源码 | 文件源码
def build_model(train_file, test_file, attr_file, model_out, predictions_out, algorithm='ridge'):
  classifiers = ['ridge', 'linear', 'lasso', 'rf', 'en']
  if algorithm not in classifiers:
    raise NotImplementedError("only implemented algorithms: " + str(classifiers))

  train_data = pd.read_pickle(train_file)
  attrs = read_attrs(attr_file)

  target_attr = attrs[0]
  usable_attrs = attrs[1:]

  if algorithm == 'ridge':
    clf = Ridge()
  elif algorithm == 'linear':
    clf = LinearRegression()
  elif algorithm == 'lasso':
    clf = Lasso()
  elif algorithm == 'en':
    clf = ElasticNet()
  else:
    clf = RandomForestRegressor()

  clf.fit(train_data[usable_attrs], train_data[target_attr])

  test_data = pd.read_pickle(test_file)
  predictions = clf.predict(test_data[usable_attrs])
  errors = predictions - test_data[target_attr]

  prediction_results = test_data[[target_attr] + usable_attrs].copy()
  prediction_results['predicted'] = predictions
  prediction_results.to_pickle(predictions_out)

  print "Modeling '%s'" % target_attr
  print "   Train:", train_file, '(%d examples)' % len(train_data)
  print "   Test:", test_file, '(%d examples)' % len(test_data)
  print "Algorithm:", algorithm

  if hasattr(clf, 'coef_'):
    print 'Coefficients:'
    for i,c in enumerate(clf.coef_):
      print '    %-20s' % usable_attrs[i] + ':', '%20.4f' % c

  print 'MSE  : %10.4f' % np.mean(errors ** 2)
  print 'medSE: %10.4f' % np.median(errors ** 2)
  print 'SSE  : %10.4f' % np.sum(errors ** 2)
  print 'Variance score: %.4f' % clf.score(test_data[usable_attrs], test_data[target_attr])

  pickle.dump(clf, open(model_out, 'wb'))
项目:sia-cog    作者:deepakkumar1984    | 项目源码 | 文件源码
def getSKLearnModel(modelName):
    if modelName == 'LinearRegression':
        model = linear_model.LinearRegression()
    elif modelName == 'BayesianRidge':
        model = linear_model.BayesianRidge()
    elif modelName == 'ARDRegression':
        model = linear_model.ARDRegression()
    elif modelName == 'ElasticNet':
        model = linear_model.ElasticNet()
    elif modelName == 'HuberRegressor':
        model = linear_model.HuberRegressor()
    elif modelName == 'Lasso':
        model = linear_model.Lasso()
    elif modelName == 'LassoLars':
        model = linear_model.LassoLars()
    elif modelName == 'Rigid':
        model = linear_model.Ridge()
    elif modelName == 'SGDRegressor':
        model = linear_model.SGDRegressor()
    elif modelName == 'SVR':
        model = SVR()
    elif modelName=='MLPClassifier':
        model = MLPClassifier()
    elif modelName=='KNeighborsClassifier':
        model = KNeighborsClassifier()
    elif modelName=='SVC':
        model = SVC()
    elif modelName=='GaussianProcessClassifier':
        model = GaussianProcessClassifier()
    elif modelName=='DecisionTreeClassifier':
        model = DecisionTreeClassifier()
    elif modelName=='RandomForestClassifier':
        model = RandomForestClassifier()
    elif modelName=='AdaBoostClassifier':
        model = AdaBoostClassifier()
    elif modelName=='GaussianNB':
        model = GaussianNB()
    elif modelName=='LogisticRegression':
        model = linear_model.LogisticRegression()
    elif modelName=='QuadraticDiscriminantAnalysis':
        model = QuadraticDiscriminantAnalysis()

    return model