Python sklearn.linear_model 模块,Lasso() 实例源码

我们从Python开源项目中,提取了以下50个代码示例,用于说明如何使用sklearn.linear_model.Lasso()

项目:mlens    作者:flennerhag    | 项目源码 | 文件源码
def build_ensemble(**kwargs):
    """Generate ensemble."""

    ens = SuperLearner(**kwargs)
    prep = {'Standard Scaling': [StandardScaler()],
            'Min Max Scaling': [MinMaxScaler()],
            'No Preprocessing': []}

    est = {'Standard Scaling':
               [ElasticNet(), Lasso(), KNeighborsRegressor()],
           'Min Max Scaling':
               [SVR()],
           'No Preprocessing':
               [RandomForestRegressor(random_state=SEED),
                GradientBoostingRegressor()]}

    ens.add(est, prep)

    ens.add(GradientBoostingRegressor(), meta=True)

    return ens
项目:jamespy_py3    作者:jskDr    | 项目源码 | 文件源码
def _cv_r0( method, xM, yV, alpha, n_folds = 5, n_jobs = -1, grid_std = None, graph = True):
    """
    method can be 'Ridge', 'Lasso'
    cross validation is performed so as to generate prediction output for all input molecules
    """ 
    print(xM.shape, yV.shape)

    clf = getattr( linear_model, method)( alpha = alpha)
    kf_n = cross_validation.KFold( xM.shape[0], n_folds=n_folds, shuffle=True)
    yV_pred = cross_validation.cross_val_predict( clf, xM, yV, cv = kf_n, n_jobs = n_jobs)

    if graph:
        print('The prediction output using cross-validation is given by:')
        jutil.cv_show( yV, yV_pred, grid_std = grid_std)

    return yV_pred
项目:strategy    作者:kanghua309    | 项目源码 | 文件源码
def model_cross_valid(X,Y):
    seed = 7
    kfold = model_selection.KFold(n_splits=10, random_state=seed)
    def bulid_model(model_name):
        model = model_name()
        return model
    scoring = 'neg_mean_squared_error'
    # + random fest boost lstm gbdt

    for model_name in [LinearRegression,ElasticNet]:
    #for model_name in [LinearRegression,Ridge,Lasso,ElasticNet,KNeighborsRegressor,DecisionTreeRegressor,SVR,RandomForestRegressor,AdaBoostRegressor,GradientBoostingRegressor]:
        model = bulid_model(model_name)
        results = model_selection.cross_val_score(model, X, Y, cv=kfold, scoring=scoring)
        print(model_name,results.mean())
项目:DSI-personal-reference-kit    作者:teb311    | 项目源码 | 文件源码
def fit_regression(X, y, regression_class=LinearRegression, regularization_const=.001):
    '''
        Given a dataset and some solutions (X, y) a regression class (from scikit learn)
        and an Lambda which is required if the regression class is Lasso or Ridge

        X (pandas DataFrame): The data.
        y (pandas DataFrame or Series): The answers.
        regression_class (class): One of sklearn.linear_model.[LinearRegression, Ridge, Lasso]
        regularization_const: the regularization_const value (regularization parameter) for Ridge or Lasso.
                              Called alpha by scikit learn for interface reasons.

        Return:
            tuple, (the_fitted_regressor, mean(cross_val_score)).
    '''
    if regression_class is LinearRegression:
        predictor = regression_class()
    else:
        predictor = regression_class(alpha=regularization_const, normalize=True)

    predictor.fit(X, y)

    cross_scores = cross_val_score(predictor, X, y=y, scoring='neg_mean_squared_error')
    cross_scores_corrected = np.sqrt(-1 * cross_scores)  # Scikit learn returns negative vals && we need root

    return (predictor, np.mean(cross_scores_corrected))
项目:poormining    作者:bowenpay    | 项目源码 | 文件源码
def run(self):
        """ ?? """
        # ????
        X, Y = self._fetch_data()
        clf = self.get_classifier(X, Y)
        # ??
        X, Y = self._fetch_test_data()
        res = []
        for item in range(11):
            hit_ratio = self.predict(clf, X, Y, item * 0.1)
            res.append([item * 0.1 * 100, hit_ratio * 100])

        # ??????????????
        arr = np.array(res)
        plt.plot(arr[:, 0], arr[:, 1])        # ???
        plt.plot(arr[:, 0], arr[:, 1], 'ro')  # ???
        plt.xlabel('???(%)')
        plt.ylabel('???(%)')
        plt.title('??Lasso?????????????')
        plt.show()
项目:linvpy    作者:LCAV    | 项目源码 | 文件源码
def lasso_regularization(matrix_a, vector_y, lambda_parameter=0):
    """
    Lasso algorithm that solves min ||y - Ax||_2^2 + lambda ||x||_1
    :param matrix_a:
    :param vector_y:
    :param lambda_parameter:
    :return: estimated x
    """

    # convert regularization parameter (sklearn considers (1/2m factor))
    reg_parameter = lambda_parameter / (2 * len(vector_y))

    # initialize model
    clf = linear_model.Lasso(reg_parameter, fit_intercept=False, normalize=False)

    # fit it
    clf.fit(matrix_a, vector_y)

    # return estimate
    x = clf.coef_

    return x
项目:jamespy_py3    作者:jskDr    | 项目源码 | 文件源码
def _cv_r0( method, xM, yV, alpha, n_splits = 5, n_jobs = -1, grid_std = None, graph = True):
    """
    method can be 'Ridge', 'Lasso'
    cross validation is performed so as to generate prediction output for all input molecules
    """ 
    print(xM.shape, yV.shape)

    clf = getattr( linear_model, method)( alpha = alpha)
    kf_n_c = model_selection.KFold( n_splits = n_splits, shuffle=True)
    kf_n = kf5_ext_c.split( xM)
    yV_pred = model_selection.cross_val_predict( clf, xM, yV, cv = kf_n, n_jobs = n_jobs)

    if graph:
        print('The prediction output using cross-validation is given by:')
        jutil.cv_show( yV, yV_pred, grid_std = grid_std)

    return yV_pred
项目:jamespy_py3    作者:jskDr    | 项目源码 | 文件源码
def cv( method, xM, yV, alpha, n_splits = 5, n_jobs = -1, grid_std = None, graph = True, shuffle = True):
    """
    method can be 'Ridge', 'Lasso'
    cross validation is performed so as to generate prediction output for all input molecules
    """ 
    print(xM.shape, yV.shape)

    clf = getattr( linear_model, method)( alpha = alpha)
    kf_n_c = model_selection.KFold( n_splits=n_splits, shuffle=shuffle)
    kf_n = kf5_ext_c.split( xM)
    yV_pred = model_selection.cross_val_predict( clf, xM, yV, cv = kf_n, n_jobs = n_jobs)

    if graph:
        print('The prediction output using cross-validation is given by:')
        jutil.cv_show( yV, yV_pred, grid_std = grid_std)

    return yV_pred
项目:jamespy_py3    作者:jskDr    | 项目源码 | 文件源码
def mlr_val_vseq_lasso( RM, yE, v_seq, alpha = .5, disp = True, graph = True):
    """
    Validation is peformed using vseq indexed values.
    """
    org_seq = list(range( len( yE)))
    t_seq = [x for x in org_seq if x not in v_seq]

    RMt, yEt = RM[ t_seq, :], yE[ t_seq, 0]
    RMv, yEv = RM[ v_seq, :], yE[ v_seq, 0]

    clf = linear_model.Lasso( alpha = alpha)
    clf.fit( RMt, yEt)

    if disp: print('Training result')
    mlr_show( clf, RMt, yEt, disp = disp, graph = graph)

    if disp: print('Validation result')
    r_sqr, RMSE = mlr_show( clf, RMv, yEv, disp = disp, graph = graph)

    #if r_sqr < 0:
    #   print 'v_seq:', v_seq, '--> r_sqr = ', r_sqr

    return r_sqr, RMSE
项目:jamespy_py3    作者:jskDr    | 项目源码 | 文件源码
def cv_SVR( xM, yV, svr_params, n_splits = 5, n_jobs = -1, grid_std = None, graph = True, shuffle = True):
    """
    method can be 'Ridge', 'Lasso'
    cross validation is performed so as to generate prediction output for all input molecules
    """ 
    print(xM.shape, yV.shape)

    clf = svm.SVR( **svr_params)
    kf_n_c = model_selection.KFold( n_splits=n_splits, shuffle=shuffle)
    kf_n = kf5_ext_c.split( xM)
    yV_pred = model_selection.cross_val_predict( clf, xM, yV, cv = kf_n, n_jobs = n_jobs)

    if graph:
        print('The prediction output using cross-validation is given by:')
        kutil.cv_show( yV, yV_pred, grid_std = grid_std)

    return yV_pred
项目:jamespy_py3    作者:jskDr    | 项目源码 | 文件源码
def _cv_r0( method, xM, yV, alpha, n_splits = 5, n_jobs = -1, grid_std = None, graph = True):
    """
    method can be 'Ridge', 'Lasso'
    cross validation is performed so as to generate prediction output for all input molecules
    """ 
    print(xM.shape, yV.shape)

    clf = getattr( linear_model, method)( alpha = alpha)
    kf_n_c = model_selection.KFold( n_splits = n_splits, shuffle=True)
    kf_n = kf5_ext_c.split( xM)
    yV_pred = model_selection.cross_val_predict( clf, xM, yV, cv = kf_n, n_jobs = n_jobs)

    if graph:
        print('The prediction output using cross-validation is given by:')
        kutil.cv_show( yV, yV_pred, grid_std = grid_std)

    return yV_pred
项目:jamespy_py3    作者:jskDr    | 项目源码 | 文件源码
def cv( method, xM, yV, alpha, n_splits = 5, n_jobs = -1, grid_std = None, graph = True, shuffle = True):
    """
    method can be 'Ridge', 'Lasso'
    cross validation is performed so as to generate prediction output for all input molecules
    """
    print(xM.shape, yV.shape)

    clf = getattr( linear_model, method)( alpha = alpha)
    kf_n_c = model_selection.KFold( n_splits=n_splits, shuffle=shuffle)
    kf_n = kf_n_c.split( xM)
    yV_pred = model_selection.cross_val_predict( clf, xM, yV, cv = kf_n, n_jobs = n_jobs)

    if graph:
        print('The prediction output using cross-validation is given by:')
        kutil.cv_show( yV, yV_pred, grid_std = grid_std)

    return yV_pred
项目:jamespy_py3    作者:jskDr    | 项目源码 | 文件源码
def cvLOO( method, xM, yV, alpha, n_jobs = -1, grid_std = None, graph = True):
    """
    method can be 'Ridge', 'Lasso'
    cross validation is performed so as to generate prediction output for all input molecules
    """ 
    n_splits = xM.shape[0]

    # print(xM.shape, yV.shape)

    clf = getattr( linear_model, method)( alpha = alpha)
    kf_n = model_selection.KFold( xM.shape[0], n_splits=n_splits)
    yV_pred = model_selection.cross_val_predict( clf, xM, yV, cv = kf_n, n_jobs = n_jobs)

    if graph:
        print('The prediction output using cross-validation is given by:')
        kutil.cv_show( yV, yV_pred, grid_std = grid_std)

    return yV_pred
项目:jamespy_py3    作者:jskDr    | 项目源码 | 文件源码
def mlr_val_vseq_lasso( RM, yE, v_seq, alpha = .5, disp = True, graph = True):
    """
    Validation is peformed using vseq indexed values.
    """
    org_seq = list(range( len( yE)))
    t_seq = [x for x in org_seq if x not in v_seq]

    RMt, yEt = RM[ t_seq, :], yE[ t_seq, 0]
    RMv, yEv = RM[ v_seq, :], yE[ v_seq, 0]

    clf = linear_model.Lasso( alpha = alpha)
    clf.fit( RMt, yEt)

    if disp: print('Training result')
    mlr_show( clf, RMt, yEt, disp = disp, graph = graph)

    if disp: print('Validation result')
    r_sqr, RMSE = mlr_show( clf, RMv, yEv, disp = disp, graph = graph)

    #if r_sqr < 0:
    #   print 'v_seq:', v_seq, '--> r_sqr = ', r_sqr

    return r_sqr, RMSE
项目:jamespy_py3    作者:jskDr    | 项目源码 | 文件源码
def pd_gscv( pdr, method, xM, yV, alphas_log, colname = 'Predicted-RP', fname = 'sheet/rafa36795_cxcalc_prp1000.csv'):
    """
    This run grid search, perform cross-validation for plotting and save the predicted values,
    """

    print("1. Searching the best hyper-parameter by a grid method.")
    gr = jgrid.gs( method, xM, yV, alphas_log)
    print(gr.grid_scores_)
    print("Best alpha:", gr.best_params_['alpha'])

    print("2. Predicting the property using the best hyper-parameter and show a x-y plot")
    yV_pred = jgrid.cv( 'Lasso', xM, yV, alpha = gr.best_params_['alpha'], grid_std = gr_beststd(gr))

    print("3. Saving the predicted results in crossvalidation into", fname)
    pdw = pdr.copy()
    pdw[ colname] = yV_pred.tolist()
    pdw.to_csv( fname, index = False)

    print("4. Saving the best estimator as a pkl file")
    print(gr.best_estimator_)
    externals.joblib.dump(gr.best_estimator_, fname[:-3] +  "pkl")
项目:jamespy_py3    作者:jskDr    | 项目源码 | 文件源码
def cv_SVR(xM, yV, svr_params, n_folds=5, n_jobs=-1, grid_std=None, graph=True, shuffle=True):
    """
    method can be 'Ridge', 'Lasso'
    cross validation is performed so as to generate prediction output for all input molecules
    """
    print(xM.shape, yV.shape)

    clf = svm.SVR(**svr_params)
    kf_n_c = model_selection.KFold(n_splits=n_folds, shuffle=True)
    kf_n = kf_n_c.split(xM)

    yV_pred = model_selection.cross_val_predict(
        clf, xM, yV.A1, cv=kf_n, n_jobs=n_jobs)

    if graph:
        print('The prediction output using cross-validation is given by:')
        jutil.cv_show(yV, yV_pred, grid_std=grid_std)

    return yV_pred
项目:jamespy_py3    作者:jskDr    | 项目源码 | 文件源码
def _cv_r0(method, xM, yV, alpha, n_folds=5, n_jobs=-1, grid_std=None, graph=True):
    """
    method can be 'Ridge', 'Lasso'
    cross validation is performed so as to generate prediction output for all input molecules
    """
    print(xM.shape, yV.shape)

    clf = getattr(linear_model, method)(alpha=alpha)
    kf_n_c = model_selection.KFold(n_splits=n_folds, shuffle=True)
    kf_n = kf_n_c.split(xM)
    yV_pred = model_selection.cross_val_predict(
        clf, xM, yV, cv=kf_n, n_jobs=n_jobs)

    if graph:
        print('The prediction output using cross-validation is given by:')
        jutil.cv_show(yV, yV_pred, grid_std=grid_std)

    return yV_pred
项目:jamespy_py3    作者:jskDr    | 项目源码 | 文件源码
def cv(method, xM, yV, alpha, n_folds=5, n_jobs=-1, grid_std=None, graph=True, shuffle=True):
    """
    method can be 'Ridge', 'Lasso'
    cross validation is performed so as to generate prediction output for all input molecules
    Return
    --------
    yV_pred
    """
    print(xM.shape, yV.shape)

    clf = getattr(linear_model, method)(alpha=alpha)
    kf_n_c = model_selection.KFold(n_splits=n_folds, shuffle=True)
    kf_n = kf_n_c.split(xM)
    yV_pred = model_selection.cross_val_predict(
        clf, xM, yV, cv=kf_n, n_jobs=n_jobs)

    if graph:
        print('The prediction output using cross-validation is given by:')
        jutil.cv_show(yV, yV_pred, grid_std=grid_std)

    return yV_pred
项目:jamespy_py3    作者:jskDr    | 项目源码 | 文件源码
def _cv_LOO_r0(method, xM, yV, alpha, n_jobs=-1, grid_std=None, graph=True):
    """
    method can be 'Ridge', 'Lasso'
    cross validation is performed so as to generate prediction output for all input molecules
    """
    n_folds = xM.shape[0]

    print(xM.shape, yV.shape)

    clf = getattr(linear_model, method)(alpha=alpha)
    # print("Note - shuffling is not applied because of LOO.")
    kf_n_c = model_selection.KFold(n_splits=n_folds)
    kf_n = kf_n_c.split(xM)
    yV_pred = model_selection.cross_val_predict(
        clf, xM, yV, cv=kf_n, n_jobs=n_jobs)

    if graph:
        print('The prediction output using cross-validation is given by:')
        jutil.cv_show(yV, yV_pred, grid_std=grid_std)

    return yV_pred
项目:jamespy_py3    作者:jskDr    | 项目源码 | 文件源码
def cv( method, xM, yV, alpha, n_folds = 5, n_jobs = -1, grid_std = None, graph = True):
    """
    method can be 'Ridge', 'Lasso'
    cross validation is performed so as to generate prediction output for all input molecules
    """ 
    print(xM.shape, yV.shape)

    clf = getattr( linear_model, method)( alpha = alpha)
    kf_n = cross_validation.KFold( xM.shape[0], n_folds=n_folds, shuffle=True)
    yV_pred = cross_validation.cross_val_predict( clf, xM, yV, cv = kf_n, n_jobs = n_jobs)

    if graph:
        print('The prediction output using cross-validation is given by:')
        jutil.cv_show( yV, yV_pred, grid_std = grid_std)

    return yV_pred
项目:jamespy_py3    作者:jskDr    | 项目源码 | 文件源码
def cv( method, xM, yV, alpha, n_folds = 5, n_jobs = -1, grid_std = None, graph = True, shuffle = True):
    """
    method can be 'Ridge', 'Lasso'
    cross validation is performed so as to generate prediction output for all input molecules
    """ 
    print(xM.shape, yV.shape)

    clf = getattr( linear_model, method)( alpha = alpha)
    kf_n = cross_validation.KFold( xM.shape[0], n_folds=n_folds, shuffle=shuffle)
    yV_pred = cross_validation.cross_val_predict( clf, xM, yV, cv = kf_n, n_jobs = n_jobs)

    if graph:
        print('The prediction output using cross-validation is given by:')
        jutil.cv_show( yV, yV_pred, grid_std = grid_std)

    return yV_pred
项目:jamespy_py3    作者:jskDr    | 项目源码 | 文件源码
def _cv_LOO_r0( method, xM, yV, alpha, n_jobs = -1, grid_std = None, graph = True):
    """
    method can be 'Ridge', 'Lasso'
    cross validation is performed so as to generate prediction output for all input molecules
    """ 
    n_folds = xM.shape[0]

    print(xM.shape, yV.shape)

    clf = getattr( linear_model, method)( alpha = alpha)
    kf_n = cross_validation.KFold( xM.shape[0], n_folds=n_folds)
    yV_pred = cross_validation.cross_val_predict( clf, xM, yV, cv = kf_n, n_jobs = n_jobs)

    if graph:
        print('The prediction output using cross-validation is given by:')
        jutil.cv_show( yV, yV_pred, grid_std = grid_std)

    return yV_pred
项目:ML-note    作者:JasonK93    | 项目源码 | 文件源码
def test_Lasso(*data):
    '''
    test the correlation between alpha and sparse condition
    :param data: train_data, test_data, train_value, test_value
    :return: None
    '''
    X,y=data
    alphas=np.logspace(-2,2)
    zeros=[]
    for alpha in alphas:
        regr=Lasso(alpha=alpha)
        regr.fit(X,y)
        num=0
        for ele in regr.coef_:
            if abs(ele) < 1e-5:num+=1
        zeros.append(num)
    fig=plt.figure()
    ax=fig.add_subplot(1,1,1)
    ax.plot(alphas,zeros)
    ax.set_xlabel(r"$\alpha$")
    ax.set_xscale("log")
    ax.set_ylim(0,X.shape[1]+1)
    ax.set_ylabel("zeros in coef")
    ax.set_title("Sparsity In Lasso")
    plt.show()
项目:ML-note    作者:JasonK93    | 项目源码 | 文件源码
def test_Lasso_alpha(*data):
    '''
    test the score with different alpha
    :param data: train_data, test_data, train_value, test_value
    :return: None
    '''

    X_train,X_test,y_train,y_test=data
    alphas=[0.01,0.02,0.05,0.1,0.2,0.5,1,2,5,10,20,50,100,200,500,1000]
    scores=[]
    for i,alpha in enumerate(alphas):
        regr = linear_model.Lasso(alpha=alpha)
        regr.fit(X_train, y_train)
        scores.append(regr.score(X_test, y_test))
    ## graph
    fig=plt.figure()
    ax=fig.add_subplot(1,1,1)
    ax.plot(alphas,scores)
    ax.set_xlabel(r"$\alpha$")
    ax.set_ylabel(r"score")
    ax.set_xscale('log')
    ax.set_title("Lasso")
    plt.show()
项目:Parallel-SGD    作者:angadgill    | 项目源码 | 文件源码
def test_rank_deficient_design():
    # consistency test that checks that LARS Lasso is handling rank
    # deficient input data (with n_features < rank) in the same way
    # as coordinate descent Lasso
    y = [5, 0, 5]
    for X in ([[5, 0],
               [0, 5],
               [10, 10]],

              [[10, 10, 0],
               [1e-32, 0, 0],
               [0, 0, 1]],
              ):
        # To be able to use the coefs to compute the objective function,
        # we need to turn off normalization
        lars = linear_model.LassoLars(.1, normalize=False)
        coef_lars_ = lars.fit(X, y).coef_
        obj_lars = (1. / (2. * 3.)
                    * linalg.norm(y - np.dot(X, coef_lars_)) ** 2
                    + .1 * linalg.norm(coef_lars_, 1))
        coord_descent = linear_model.Lasso(.1, tol=1e-6, normalize=False)
        coef_cd_ = coord_descent.fit(X, y).coef_
        obj_cd = ((1. / (2. * 3.)) * linalg.norm(y - np.dot(X, coef_cd_)) ** 2
                  + .1 * linalg.norm(coef_cd_, 1))
        assert_less(obj_lars, obj_cd * (1. + 1e-8))
项目:Parallel-SGD    作者:angadgill    | 项目源码 | 文件源码
def test_lasso_lars_vs_lasso_cd_early_stopping(verbose=False):
    # Test that LassoLars and Lasso using coordinate descent give the
    # same results when early stopping is used.
    # (test : before, in the middle, and in the last part of the path)
    alphas_min = [10, 0.9, 1e-4]
    for alphas_min in alphas_min:
        alphas, _, lasso_path = linear_model.lars_path(X, y, method='lasso',
                                                       alpha_min=0.9)
        lasso_cd = linear_model.Lasso(fit_intercept=False, tol=1e-8)
        lasso_cd.alpha = alphas[-1]
        lasso_cd.fit(X, y)
        error = linalg.norm(lasso_path[:, -1] - lasso_cd.coef_)
        assert_less(error, 0.01)

    alphas_min = [10, 0.9, 1e-4]
    # same test, with normalization
    for alphas_min in alphas_min:
        alphas, _, lasso_path = linear_model.lars_path(X, y, method='lasso',
                                                       alpha_min=0.9)
        lasso_cd = linear_model.Lasso(fit_intercept=True, normalize=True,
                                      tol=1e-8)
        lasso_cd.alpha = alphas[-1]
        lasso_cd.fit(X, y)
        error = linalg.norm(lasso_path[:, -1] - lasso_cd.coef_)
        assert_less(error, 0.01)
项目:Tback    作者:ideaplat    | 项目源码 | 文件源码
def run(self):
        """ ?? """
        # ????
        X, Y = self._fetch_data()
        clf = self.get_classifier(X, Y)
        # ??
        X, Y = self._fetch_test_data()
        res = []
        for item in range(11):
            hit_ratio = self.predict(clf, X, Y, item * 0.1)
            res.append([item * 0.1 * 100, hit_ratio * 100])

        # ??????????????
        arr = np.array(res)
        plt.plot(arr[:, 0], arr[:, 1])        # ???
        plt.plot(arr[:, 0], arr[:, 1], 'ro')  # ???
        plt.xlabel('???(%)')
        plt.ylabel('???(%)')
        plt.title('??Lasso?????????????')
        plt.show()
项目:Tback    作者:ideaplat    | 项目源码 | 文件源码
def run(self):
        """ ?? """
        # ????
        X, Y = self._fetch_data()
        clf = self.get_classifier(X, Y)
        # ??
        X, Y = self._fetch_test_data()
        res = []
        for item in range(11):
            hit_ratio = self.predict(clf, X, Y, item * 0.1)
            res.append([item * 0.1 * 100, hit_ratio * 100])

        # ??????????????
        arr = np.array(res)
        plt.plot(arr[:, 0], arr[:, 1])        # ???
        plt.plot(arr[:, 0], arr[:, 1], 'ro')  # ???
        plt.xlabel('???(%)')
        plt.ylabel('???(%)')
        plt.title('??Lasso?????????????')
        plt.show()
项目:kaggle    作者:RankingAI    | 项目源码 | 文件源码
def train(self):
        """"""
        start = time.time()

        extra_tr = pd.read_hdf(path_or_buf='%s/p21/eval_train.hdf' % self.InputDir, key='train')

        print('size before truncated outliers is %d ' % len(self.TrainData))
        self.TrainData = self.TrainData[(self.TrainData['logerror'] > self._low) & (self.TrainData['logerror'] < self._up)]
        #self.TrainData = self.TrainData.join(extra_tr, on='parcelid', how= 'left')
        self.TrainData = pd.concat([self.TrainData, extra_tr.drop('parcelid', axis= 1)], axis = 1)
        print('size after truncated outliers is %d ' % len(self.TrainData))

        X = self.TrainData.drop(self._l_drop_cols, axis=1)
        Y = self.TrainData['logerror']
        self._l_train_columns = X.columns
        X = X.values.astype(np.float32, copy=False)

        lr = Lasso(alpha= self._lr_alpha, max_iter= self._lr_iter, tol= 1e-4, random_state= 2017, selection= self._lr_sel)
        self._model = lr.fit(X, Y)
        end = time.time()

        print('Training iterates %d, time consumed %d ' % (self._model.n_iter_, (end - start)))

        self._f_eval_train_model = '{0}/{1}_{2}.pkl'.format(self.OutputDir, self.__class__.__name__,
                                                            datetime.now().strftime('%Y%m%d-%H:%M:%S'))
        with open(self._f_eval_train_model, 'wb') as o_file:
            pickle.dump(self._model, o_file, -1)
        o_file.close()

        #self.TrainData = pd.concat([self.TrainData, self.ValidData[self.TrainData.columns]],
        #                           ignore_index=True)  ## ignore_index will reset the index or index will be overlaped

        return
项目:sanergy-public    作者:dssg    | 项目源码 | 文件源码
def define_model(self):
        #if self.modeltype == "AR" :
        #    return statsmodels.tsa.ar_model.AR(max_order=self.parameters['max_order'])
        if self.modeltype == "RandomForest" :
            return ensemble.RandomForestRegressor(n_estimators=self.parameters['n_estimators'])
            #return ensemble.RandomForestClassifier(
            #    n_estimators=self.parameters['n_estimators'])
        elif self.modeltype == "LinearRegression" :
            return linear_model.LinearRegression()
        elif self.modeltype == "Lasso" :
            return linear_model.Lasso(
            alpha=self.parameters['alpha'])
        elif self.modeltype == "ElasticNet" :
            return linear_model.ElasticNet(
            alpha=self.parameters['alpha'],
            l1_ratio=self.parameters['l1_ratio'])
        elif self.modeltype == "SVR" :
            return SVR(
            C=self.parameters['C'],
            epsilon=self.parameters['epsilon'],
            kernel=self.parameters['kernel'])
        #elif self.modeltype == 'StaticModel':
        #   return StaticModel (
        #      parameters=self.parameters
        #     )
        #elif self.modeltype == 'AdvancedStaticModel':
        #   return AdvancedStaticModel (
        #       parameters=self.parameters
        #        )

        # elif self.modeltype == 'SGDRegressor' :
        #     print(self.parameters)
        #     return linear_model.SGDRegressor(
        #     loss=self.parameters['loss'],
        #     penalty=self.parameters['penalty'],
        #     l1_ratio=self.parameters['l1_ratio'])
        else:
            raise ConfigError("Unsupported model {0}".format(self.modeltype))
项目:OpenAPS    作者:medicinexlab    | 项目源码 | 文件源码
def lasso_regression_model(parameter_array):
    alpha_value = parameter_array[0] #alpha value index is first index
    return linear_model.Lasso(alpha=alpha_value, fit_intercept=True, normalize=True, precompute=False, copy_X=True,
                                max_iter=1000, tol=0.0001, warm_start=False, positive=False, random_state=None, selection='cyclic')

#Returns the SVR Linear Kernel model
项目:strategy    作者:kanghua309    | 项目源码 | 文件源码
def model_fit_and_test(TrainX,TrainY,TestX,TestY):
    def bulid_model(model_name):
        model = model_name()
        return model
    #for model_name in [LinearRegression, Ridge, Lasso, ElasticNet, KNeighborsRegressor, DecisionTreeRegressor, SVR,RandomForestRegressor, AdaBoostRegressor, GradientBoostingRegressor]:
    for model_name in [LinearRegression, ElasticNet]:
        model = bulid_model(model_name)
        model.fit(TrainX,TrainY)
        print(model_name)
        resid = model.predict(TestX) - TestY
        #print resid
        print("Residual sum of squares: %f"% np.mean(resid ** 2))
        #print model.predict(TestX)
        #print TestY
        # Explained variance score: 1 is perfect prediction
        plt.scatter(model.predict(TestX), resid);
        plt.axhline(0, color='red')
        plt.xlabel('Predicted Values')
        plt.ylabel('Residuals')
        #plt.xlim([1, 50])
        plt.show()

        print('Variance score: %.2f' % model.score(TestX, TestY))

        from statsmodels.stats.stattools import jarque_bera
        _, pvalue, _, _ = jarque_bera(resid)
        print ("Test Residuals Normal", pvalue)

        from statsmodels import regression, stats
        import statsmodels.api as sms
        import statsmodels.stats.diagnostic as smd
        # xs_with_constant = sms.add_constant(np.column_stack((X1,X2,X3,X4)))
        xs_with_constant = sms.add_constant(TestX)
        _, pvalue1, _, _ = stats.diagnostic.het_breushpagan(resid, xs_with_constant)
        print ("Test Heteroskedasticity", pvalue1)
        ljung_box = smd.acorr_ljungbox(resid, lags=10)

        #print "Lagrange Multiplier Statistics:", ljung_box[0]
        print "Test Autocorrelation P-values:", ljung_box[1]
        if any(ljung_box[1] < 0.05):
            print "The residuals are autocorrelated."
        else:
            print "The residuals are not autocorrelated."
项目:DSI-personal-reference-kit    作者:teb311    | 项目源码 | 文件源码
def main(dataset_size, test_proportion):
    diabetes = load_diabetes()
    X = diabetes.data[:dataset_size]
    y = diabetes.target[:dataset_size]

    fig, ax_list = plt.subplots(3, 1, figsize=(8, 6))
    plot_errors_by_lambda(X, y, test_proportion=test_proportion, regression_class=Ridge, ax=ax_list[0])
    plot_errors_by_lambda(X, y, test_proportion=test_proportion, regression_class=Lasso, ax=ax_list[1])
    plot_errors_by_lambda(X, y, test_proportion=test_proportion, regression_class=LinearRegression, ax=ax_list[2])

    plt.tight_layout()
    plt.show()
项目:mlprojects-py    作者:srinathperera    | 项目源码 | 文件源码
def vote_with_lr(conf, forecasts, best_model_index, y_actual):
    start = time.time()
    best_forecast = forecasts[:, best_model_index]
    forecasts = np.sort(np.delete(forecasts, best_model_index, axis=1), axis=1)
    forecasts = np.where(forecasts <=0, 0.1, forecasts)

    data_train = []

    for i in range(forecasts.shape[0]):
        f_row = forecasts[i,]
        min_diff_to_best = np.min([cal_rmsle(best_forecast[i], f) for f in f_row])
        comb = list(itertools.combinations(f_row,2))
        avg_error = scipy.stats.hmean([cal_rmsle(x,y) for (x,y) in comb])
        data_train.append([min_diff_to_best, avg_error, scipy.stats.hmean(f_row), np.median(f_row), np.std(f_row)])


    X_all = np.column_stack([np.row_stack(data_train), best_forecast])
    if conf.target_as_log:
        y_actual = transfrom_to_log(y_actual)
    #we use 10% full data to train the ensamble and 30% for evalaution
    no_of_training_instances = int(round(len(y_actual)*0.25))
    X_train, X_test, y_train, y_test = train_test_split(no_of_training_instances, X_all, y_actual)
    y_actual_test = y_actual[no_of_training_instances:]

    lr_model =linear_model.Lasso(alpha = 0.2)
    lr_model.fit(X_train, y_train)
    lr_forecast = lr_model.predict(X_test)
    lr_forcast_revered = retransfrom_from_log(lr_forecast)
    calculate_accuracy("vote__lr_forecast " + str(conf.command), y_actual_test, lr_forcast_revered)
    print_time_took(start, "vote_with_lr")
    return lr_forcast_revered
项目:mlprojects-py    作者:srinathperera    | 项目源码 | 文件源码
def get_models4ensamble(conf):
    models = []
    #models = [RFRModel(conf), DLModel(conf), LRModel(conf)]
    #models = [LRModel(conf)]
    # see http://scikit-learn.org/stable/modules/linear_model.html

    #0 was too big to run with depth set to 1, and 1 was overfitting a bit

    if conf.command == 1:
        xgb_params = {"objective": "reg:linear", "booster":"gbtree", "max_depth":3, "eta":0.1, "min_child_weight":5,
            "subsample":0.5, "nthread":4, "colsample_bytree":0.5, "num_parallel_tree":1, 'gamma':0}
    else:
        xgb_params = {"objective": "reg:linear", "booster":"gbtree", "max_depth":10, "eta":0.1, "min_child_weight":8,
            "subsample":0.5, "nthread":4, "colsample_bytree":0.5, "num_parallel_tree":1, 'gamma':0}

    #xgb_params = {"objective": "reg:linear", "booster":"gbtree", "max_depth":10, "eta":0.1, "min_child_weight":8,
    #    "subsample":0.5, "nthread":4, "colsample_bytree":0.5, "num_parallel_tree":1, 'gamma':0}

    models = [
                #DLModel(conf),

                #LRModel(conf, model=linear_model.BayesianRidge()),
                #LRModel(conf, model=linear_model.LassoLars(alpha=.1)),
                #LRModel(conf, model=linear_model.Lasso(alpha = 0.1)),
                #LRModel(conf, model=Pipeline([('poly', PolynomialFeatures(degree=3)),
                #LRModel(conf, model=linear_model.Ridge (alpha = .5))
                #   ('linear', LinearRegression(fit_intercept=False))])),
                XGBoostModel(conf, xgb_params, use_cv=True),
                LRModel(conf, model=linear_model.Lasso(alpha = 0.3)),
                RFRModel(conf, RandomForestRegressor(oob_score=True, n_jobs=4)),
                #LRModel(conf, model=linear_model.Lasso(alpha = 0.2)),
                ETRModel(conf, model=ExtraTreesRegressor(n_jobs=4)),
                #AdaBoostRModel(conf, model=AdaBoostRegressor(loss='square'))
              ]
    return models
    #return [XGBoostModel(conf, xgb_params, use_cv=True)]
项目:healthcareai-py    作者:HealthCatalyst    | 项目源码 | 文件源码
def lasso_regression(self, scoring_metric='neg_mean_squared_error',
                         hyperparameter_grid=None,
                         randomized_search=True,
                         number_iteration_samples=2):
        """
        A light wrapper for Sklearn's lasso regression that performs randomized search over an overridable default
        hyperparameter grid.

        Args:
            scoring_metric (str): Any sklearn scoring metric appropriate for regression
            hyperparameter_grid (dict): hyperparameters by name
            randomized_search (bool): True for randomized search (default)
            number_iteration_samples (int): Number of models to train during the randomized search for exploring the
                hyperparameter space. More may lead to a better model, but will take longer.

        Returns:
            TrainedSupervisedModel:
        """
        self.validate_regression('Lasso Regression')
        if hyperparameter_grid is None:
            hyperparameter_grid = {"fit_intercept": [True, False]}
            number_iteration_samples = 2

        algorithm = get_algorithm(Lasso,
                                  scoring_metric,
                                  hyperparameter_grid,
                                  randomized_search,
                                  number_iteration_samples=number_iteration_samples)

        trained_supervised_model = self._create_trained_supervised_model(algorithm)

        return trained_supervised_model
项目:mlens    作者:flennerhag    | 项目源码 | 文件源码
def build_ensemble(**kwargs):
    """Generate ensemble."""

    ens = SuperLearner(**kwargs)

    est = [ElasticNet(copy_X=False),
           Lasso(copy_X=False)]

    ens.add(est)
    ens.add(KNeighborsRegressor())

    return ens
项目:mlens    作者:flennerhag    | 项目源码 | 文件源码
def lasso():
    """Fit Lasso."""
    print("Fitting LAS...", end=" ", flush=True)
    time.sleep(SLEEP)
    t0 = time.time()
    ls = Lasso()
    ls.fit(X, y)
    print_time(t0, "Done", end="")
项目:mlens    作者:flennerhag    | 项目源码 | 文件源码
def elasticnet():
    """Fit Elastic Net."""
    print("Fitting ELN...", end=" ", flush=True)
    time.sleep(SLEEP)
    t0 = time.time()
    ls = Lasso()
    ls.fit(X, y)
    print_time(t0, "Done", end="")
项目:mlens    作者:flennerhag    | 项目源码 | 文件源码
def build_ensemble(kls, **kwargs):
    """Generate ensemble of class kls."""

    ens = kls(**kwargs)
    ens.add([SVR(), RandomForestRegressor(),
             GradientBoostingRegressor(), Lasso(copy_X=False),
             MLPRegressor(shuffle=False, alpha=0.001)])
    ens.add_meta(Lasso(copy_X=False))
    return ens
项目:eezzy    作者:3Blades    | 项目源码 | 文件源码
def spot_check(X, y):
    if type == 'regression':
        models = [
        (LinearRegression(), 'Ordinary Least Squares'),
        (Ridge(alpha=0.1), 'Ridge (alpha 0.1)'),
        (Ridge(), 'Ridge (alpha 1.0)'),
        (Lasso(alpha=0.1), 'Lasso (alpha 0.1)'),
        (Lasso(), 'Lasso (alpha 1.0)'),
        (ElasticNet(alpha=0.1), 'ElasticNet (alpha 0.1)'),
        (ElasticNet(), 'ElasticNet (alpha 1.0)'),
        (DecisionTreeRegressor(), 'Decision Tree'),
        (KNeighborsRegressor(), 'K-Nearest Neighbors'),

#         (RandomForestRegressor(), 'Random Forest Regressor'),
#         (BaggingRegressor(), 'Bagging Regressor'),
#         (GradientBoostingRegressor(), 'Gradient Bosted Regression'),
#         (SVR(), 'Support Vector Regression')
    ]

    splits = 5
    scores = []

    for model, model_name in models:
        score = check_model(model, splits, X, y)
        # get average score
        scores.append(score)

    model_names = map(lambda x: x[1], models)
    for name, score in zip(model_names, scores):
        print('%s: %f' % (name, score))
项目:poormining    作者:bowenpay    | 项目源码 | 文件源码
def get_classifier(self, X, Y):
        """ ??Lasso??
        :param X: ????
        :param Y: ??????
        :return: ??
        """
        clf = Lasso()
        clf.fit(X, Y)
        return clf
项目:EarlyWarning    作者:wjlei1990    | 项目源码 | 文件源码
def train_lasso_model(_train_x, train_y, _predict_x):
    print_title("Lasso Regressor")

    train_x, predict_x = \
        standarize_feature(_train_x, _predict_x)

    reg = linear_model.LassoCV(
        precompute=True, cv=5, verbose=1, n_jobs=4)
    reg.fit(train_x, train_y)
    print("alphas: %s" % reg.alphas_)
    print("mse path: %s" % np.mean(reg.mse_path_, axis=1))

    itemindex = np.where(reg.alphas_ == reg.alpha_)
    print("itemindex: %s" % itemindex)
    _mse = np.mean(reg.mse_path_[itemindex[0], :])
    print("Best alpha using bulit-in LassoCV: %f(mse: %f)" %
          (reg.alpha_, _mse))

    alpha = reg.alpha_
    reg = linear_model.Lasso(alpha=alpha)
    reg.fit(train_x, train_y)
    n_nonzeros = (reg.coef_ != 0).sum()
    print("Non-zeros coef: %d" % n_nonzeros)
    predict_y = reg.predict(predict_x)
    train_y_pred = reg.predict(train_x)

    return {"y": predict_y, "train_y": train_y_pred, "coef": reg.coef_}
项目:kaggle-house-price    作者:motakbiri    | 项目源码 | 文件源码
def lasso(train ,test , label, alpha = 0.00099, max_iteration = 50000):
    lasso = Lasso(alpha = alpha , max_iter = max_iteration)
    lasso.fit(train,label)

    #prediction on training data
    y_predicton = lasso.predict(train)
    y_test = label
    print("Lasso score on training set: ", rmse(y_test, y_predicton))

    y_predicton = lasso.predict(test)
    y_predicton = np.exp(y_predicton)
    return y_predicton
项目:yellowbrick    作者:DistrictDataLabs    | 项目源码 | 文件源码
def test_regressor_cv(self):
        """
        Ensure only "CV" regressors are allowed
        """

        for model in (SVR, Ridge, Lasso, LassoLars, ElasticNet):
            with self.assertRaises(YellowbrickTypeError):
                alphas = AlphaSelection(model())

        for model in (RidgeCV, LassoCV, LassoLarsCV, ElasticNetCV):
            try:
                alphas = AlphaSelection(model())
            except YellowbrickTypeError:
                self.fail("could not instantiate RegressorCV on alpha selection")
项目:clue-hackathon    作者:adrinjalali    | 项目源码 | 文件源码
def run():
    data = load_binary()

    # Extract features
    user_feat_matrix = process_level2(data)  # X

    del user_feat_matrix['X']['user_id']
    X = user_feat_matrix['X'].values
    X[np.isnan(X)] = 0
    Y = user_feat_matrix['Y']
    Y.fillna(0, inplace=True)
    del user_feat_matrix['X_all']['user_id']
    X_all = user_feat_matrix['X_all'].values
    X_all[np.isnan(X_all)] = 0

    cols = list(Y.columns.values)
    symptoms = ['happy', 'pms', 'sad', 'sensitive_emotion', 'energized', 'exhausted',
                'high_energy', 'low_energy', 'cramps', 'headache', 'ovulation_pain',
                'tender_breasts', 'acne_skin', 'good_skin', 'oily_skin', 'dry_skin']
    with open("result.txt", 'w') as f:
        f.write("user_id,day_in_cycle,symptom,probability\n")

    for symptom in symptoms:
        print(symptom)

        pipeline = Pipeline([
            ('remove_low_variance_features', VarianceThreshold(threshold=0.0)),
            #('standard_scale', StandardScaler()),
            ('estimator', Lasso()),
        ])

        param_grid = {'estimator__alpha': [.1, .3, .5, .7, .8]}
        model = GridSearchCV(pipeline, param_grid = param_grid, n_jobs = 4,
                             verbose=2)
        model.fit(X, s_Y.values)

        print("dumping...")
        data_dir = 'data'
        cycles0 = pd.read_csv(join(data_dir, 'cycles0.csv'))
        c_length = {k:v for k,v in zip(cycles0.user_id.values, cycles0.expected_cycle_length)}
        dump(symptom, model, X_all, c_length, data['users'].user_id)
项目:linvpy    作者:LCAV    | 项目源码 | 文件源码
def estimate(self, a, y, initial_x=None):
        """
        :param a: MxN matrix A in the y=Ax equation
        :type a: numpy.ndarray
        :param y: M vector y in the y=Ax equation
        :type y: numpy.ndarray
        :param initial_x: N vector of an initial solution
        :type initial_x: numpy.ndarray
        :return: best estimation of the N vector x in the y=Ax equation
        :rtype: numpy.ndarray

        :Example:

        >>> import numpy as np
        >>> import linvpy as lp

        >>> a = np.matrix([[1, 2], [3, 4], [5, 6]])
        >>> y = np.array([1, 2, 3])

        >>> m = lp.MEstimator()
        >>> m.estimate(a,y)
        array([ -2.95552481e-16,   5.00000000e-01])

        >>> m_ = lp.MEstimator(loss_function=lp.Bisquare, clipping=2.23, \
        regularization=lp.Lasso(), lamb=3)
        >>> initial_solution = np.array([1, 2])
        >>> m_.estimate(a, y, initial_x=initial_solution)
        array([ 0.,  0.])
        """
        return self.irls(a, y, initial_x)
项目:sportsball    作者:jgershen    | 项目源码 | 文件源码
def build_model(train_file, attr_file, model_out, algorithm='ridge'):
  classifiers = ['ridge', 'linear', 'lasso', 'rf', 'en']
  if algorithm not in classifiers:
    raise NotImplementedError("only implemented algorithms: " + str(classifiers))

  train_data = pd.read_pickle(train_file)

  attrs = read_attrs(attr_file)
  target_attr = attrs[0]
  usable_attrs = attrs[1:]

  if algorithm == 'ridge':
    clf = Ridge()
  elif algorithm == 'linear':
    clf = LinearRegression()
  elif algorithm == 'lasso':
    clf = Lasso()
  elif algorithm == 'en':
    clf = ElasticNet()
  else:
    clf = RandomForestRegressor()

  logger.debug("Modeling '%s'", target_attr)
  logger.debug("    train set (%d): %s", len(train_data), train_file)
  logger.debug("  Algorithm: %s", algorithm)
  if hasattr(clf, 'coef_'):
    logger.debug('Coefficients:')
    for i,c in enumerate(clf.coef_):
      logger.debug('    %-20s' % usable_attrs[i] + ':', '%20.4f' % c)
  clf.fit(train_data[usable_attrs], train_data[target_attr])

  pickle.dump(clf, open(model_out, 'wb'))
项目:jamespy_py3    作者:jskDr    | 项目源码 | 文件源码
def gs_Lasso( xM, yV, alphas_log = (-1, 1, 9), n_splits=5, n_jobs = -1):

    print(xM.shape, yV.shape)

    clf = linear_model.Lasso()
    #parmas = {'alpha': np.logspace(1, -1, 9)}
    parmas = {'alpha': np.logspace( *alphas_log)}
    kf5_c = model_selection.KFold( n_splits = n_splits, shuffle=True)
    #kf5 = kf5_c.split( xM)
    gs = model_selection.GridSearchCV( clf, parmas, scoring = 'r2', cv = kf5_c, n_jobs = n_jobs)

    gs.fit( xM, yV)

    return gs
项目:jamespy_py3    作者:jskDr    | 项目源码 | 文件源码
def gs_Lasso_norm( xM, yV, alphas_log = (-1, 1, 9)):

    print(xM.shape, yV.shape)

    clf = linear_model.Lasso( normalize = True)
    #parmas = {'alpha': np.logspace(1, -1, 9)}
    parmas = {'alpha': np.logspace( *alphas_log)}
    kf5_c = model_selection.KFold( n_splits = 5, shuffle=True)
    #kf5 = kf5_c.split( xM)
    gs = model_selection.GridSearchCV( clf, parmas, scoring = 'r2', cv = kf5_c, n_jobs = -1)

    gs.fit( xM, yV)

    return gs