我们从Python开源项目中,提取了以下32个代码示例,用于说明如何使用sklearn.linear_model.ElasticNet()。
def build_ensemble(**kwargs): """Generate ensemble.""" ens = SuperLearner(**kwargs) prep = {'Standard Scaling': [StandardScaler()], 'Min Max Scaling': [MinMaxScaler()], 'No Preprocessing': []} est = {'Standard Scaling': [ElasticNet(), Lasso(), KNeighborsRegressor()], 'Min Max Scaling': [SVR()], 'No Preprocessing': [RandomForestRegressor(random_state=SEED), GradientBoostingRegressor()]} ens.add(est, prep) ens.add(GradientBoostingRegressor(), meta=True) return ens
def model_cross_valid(X,Y): seed = 7 kfold = model_selection.KFold(n_splits=10, random_state=seed) def bulid_model(model_name): model = model_name() return model scoring = 'neg_mean_squared_error' # + random fest boost lstm gbdt for model_name in [LinearRegression,ElasticNet]: #for model_name in [LinearRegression,Ridge,Lasso,ElasticNet,KNeighborsRegressor,DecisionTreeRegressor,SVR,RandomForestRegressor,AdaBoostRegressor,GradientBoostingRegressor]: model = bulid_model(model_name) results = model_selection.cross_val_score(model, X, Y, cv=kfold, scoring=scoring) print(model_name,results.mean())
def fit_enet(train_X, train_y, test_X): """ Use linear regression to predict. Elastic net is LR with L1 and L2 regularisation. :param train_X: :param train_y: :param test_X: :return: """ enet = ElasticNet() enet.fit(train_X, train_y) model = "ElasticNet int %.2f coefs %s" % (enet.intercept_, pprint(enet.coef_)) yhat_train = enet.predict(train_X) yhat_test = enet.predict(test_X) return model, yhat_train, yhat_test
def train_EN_model(_train_x, train_y, _predict_x): print_title("ElasticNet") train_x, predict_x = \ standarize_feature(_train_x, _predict_x) #l1_ratios = [1e-4, 1e-3, 1e-2, 1e-1] #l1_ratios = [1e-5, 1e-4, 1e-3] l1_ratios = [0.9, 0.92, 0.95, 0.97, 0.99] #l1_ratios = [0.5] min_mse = 1 for r in l1_ratios: t1 = time.time() reg_en = linear_model.ElasticNetCV( l1_ratio=r, cv=5, n_jobs=4, verbose=1, precompute=True) reg_en.fit(train_x, train_y) n_nonzeros = (reg_en.coef_ != 0).sum() _mse = np.mean(reg_en.mse_path_, axis=1)[ np.where(reg_en.alphas_ == reg_en.alpha_)[0][0]] if _mse < min_mse: min_mse = _mse best_l1_ratio = r best_alpha = reg_en.alpha_ t2 = time.time() print("ratio(%e) -- n: %d -- alpha: %f -- mse: %f -- " "time: %.2f sec" % (r, n_nonzeros, reg_en.alpha_, _mse, t2 - t1)) print("Best l1_ratio and alpha: %f, %f" % (best_l1_ratio, best_alpha)) # predict_model reg = linear_model.ElasticNet(l1_ratio=best_l1_ratio, alpha=best_alpha) reg.fit(train_x, train_y) predict_y = reg.predict(predict_x) train_y_pred = reg.predict(train_x) return {"y": predict_y, "train_y": train_y_pred, "coef": reg.coef_}
def connectWidgets(self): en = ElasticNet() self.minalpha_spin.setValue(0.0000001) self.maxalpha_spin.setValue(0.01) self.nalpha_spin.setValue(100) self.enl1_ratioLineEdit.setText('0.1, 0.5, 0.7, 0.9, 0.95, 0.99, 1.0') self.enfit_intercept_list.setCurrentItem(self.enfit_intercept_list.findItems(str(en.fit_intercept),QtCore.Qt.MatchExactly)[0]) self.ennormalize_list.setCurrentItem(self.ennormalize_list.findItems(str(en.normalize),QtCore.Qt.MatchExactly)[0]) #self.enprecomputeCheckBox.setChecked(en.precompute) self.enmax_iterLineEdit.setText(str(en.max_iter)) #self.encopy_XCheckBox.setChecked(en.copy_X) self.entolLineEdit.setText(str(en.tol)) self.enwarm_start_list.setCurrentItem(self.enwarm_start_list.findItems(str(en.warm_start),QtCore.Qt.MatchExactly)[0]) self.enpositive_list.setCurrentItem(self.enpositive_list.findItems(str(en.positive),QtCore.Qt.MatchExactly)[0]) #self.setComboBox(self.enselectionComboBox, ['cyclic', 'random']) #self.defaultComboItem(self.enselectionComboBox, en.selection)
def define_model(self): #if self.modeltype == "AR" : # return statsmodels.tsa.ar_model.AR(max_order=self.parameters['max_order']) if self.modeltype == "RandomForest" : return ensemble.RandomForestRegressor(n_estimators=self.parameters['n_estimators']) #return ensemble.RandomForestClassifier( # n_estimators=self.parameters['n_estimators']) elif self.modeltype == "LinearRegression" : return linear_model.LinearRegression() elif self.modeltype == "Lasso" : return linear_model.Lasso( alpha=self.parameters['alpha']) elif self.modeltype == "ElasticNet" : return linear_model.ElasticNet( alpha=self.parameters['alpha'], l1_ratio=self.parameters['l1_ratio']) elif self.modeltype == "SVR" : return SVR( C=self.parameters['C'], epsilon=self.parameters['epsilon'], kernel=self.parameters['kernel']) #elif self.modeltype == 'StaticModel': # return StaticModel ( # parameters=self.parameters # ) #elif self.modeltype == 'AdvancedStaticModel': # return AdvancedStaticModel ( # parameters=self.parameters # ) # elif self.modeltype == 'SGDRegressor' : # print(self.parameters) # return linear_model.SGDRegressor( # loss=self.parameters['loss'], # penalty=self.parameters['penalty'], # l1_ratio=self.parameters['l1_ratio']) else: raise ConfigError("Unsupported model {0}".format(self.modeltype))
def test_n_clusters(): # Test that n_clusters param works properly X, y = make_blobs(n_samples=100, centers=10) brc1 = Birch(n_clusters=10) brc1.fit(X) assert_greater(len(brc1.subcluster_centers_), 10) assert_equal(len(np.unique(brc1.labels_)), 10) # Test that n_clusters = Agglomerative Clustering gives # the same results. gc = AgglomerativeClustering(n_clusters=10) brc2 = Birch(n_clusters=gc) brc2.fit(X) assert_array_equal(brc1.subcluster_labels_, brc2.subcluster_labels_) assert_array_equal(brc1.labels_, brc2.labels_) # Test that the wrong global clustering step raises an Error. clf = ElasticNet() brc3 = Birch(n_clusters=clf) assert_raises(ValueError, brc3.fit, X) # Test that a small number of clusters raises a warning. brc4 = Birch(threshold=10000.) assert_warns(UserWarning, brc4.fit, X)
def model_fit_and_test(TrainX,TrainY,TestX,TestY): def bulid_model(model_name): model = model_name() return model #for model_name in [LinearRegression, Ridge, Lasso, ElasticNet, KNeighborsRegressor, DecisionTreeRegressor, SVR,RandomForestRegressor, AdaBoostRegressor, GradientBoostingRegressor]: for model_name in [LinearRegression, ElasticNet]: model = bulid_model(model_name) model.fit(TrainX,TrainY) print(model_name) resid = model.predict(TestX) - TestY #print resid print("Residual sum of squares: %f"% np.mean(resid ** 2)) #print model.predict(TestX) #print TestY # Explained variance score: 1 is perfect prediction plt.scatter(model.predict(TestX), resid); plt.axhline(0, color='red') plt.xlabel('Predicted Values') plt.ylabel('Residuals') #plt.xlim([1, 50]) plt.show() print('Variance score: %.2f' % model.score(TestX, TestY)) from statsmodels.stats.stattools import jarque_bera _, pvalue, _, _ = jarque_bera(resid) print ("Test Residuals Normal", pvalue) from statsmodels import regression, stats import statsmodels.api as sms import statsmodels.stats.diagnostic as smd # xs_with_constant = sms.add_constant(np.column_stack((X1,X2,X3,X4))) xs_with_constant = sms.add_constant(TestX) _, pvalue1, _, _ = stats.diagnostic.het_breushpagan(resid, xs_with_constant) print ("Test Heteroskedasticity", pvalue1) ljung_box = smd.acorr_ljungbox(resid, lags=10) #print "Lagrange Multiplier Statistics:", ljung_box[0] print "Test Autocorrelation P-values:", ljung_box[1] if any(ljung_box[1] < 0.05): print "The residuals are autocorrelated." else: print "The residuals are not autocorrelated."
def build_ensemble(**kwargs): """Generate ensemble.""" ens = SuperLearner(**kwargs) est = [ElasticNet(copy_X=False), Lasso(copy_X=False)] ens.add(est) ens.add(KNeighborsRegressor()) return ens
def get_regression_coefs(self, category, clf=ElasticNet()): ''' Computes regression score of tdfidf transformed features Parameters ---------- category : str category name to score clf : sklearn regressor Returns ------- coefficient array ''' self._fit_tfidf_model(category, clf) return clf.coef_
def spot_check(X, y): if type == 'regression': models = [ (LinearRegression(), 'Ordinary Least Squares'), (Ridge(alpha=0.1), 'Ridge (alpha 0.1)'), (Ridge(), 'Ridge (alpha 1.0)'), (Lasso(alpha=0.1), 'Lasso (alpha 0.1)'), (Lasso(), 'Lasso (alpha 1.0)'), (ElasticNet(alpha=0.1), 'ElasticNet (alpha 0.1)'), (ElasticNet(), 'ElasticNet (alpha 1.0)'), (DecisionTreeRegressor(), 'Decision Tree'), (KNeighborsRegressor(), 'K-Nearest Neighbors'), # (RandomForestRegressor(), 'Random Forest Regressor'), # (BaggingRegressor(), 'Bagging Regressor'), # (GradientBoostingRegressor(), 'Gradient Bosted Regression'), # (SVR(), 'Support Vector Regression') ] splits = 5 scores = [] for model, model_name in models: score = check_model(model, splits, X, y) # get average score scores.append(score) model_names = map(lambda x: x[1], models) for name, score in zip(model_names, scores): print('%s: %f' % (name, score))
def test_regressor_cv(self): """ Ensure only "CV" regressors are allowed """ for model in (SVR, Ridge, Lasso, LassoLars, ElasticNet): with self.assertRaises(YellowbrickTypeError): alphas = AlphaSelection(model()) for model in (RidgeCV, LassoCV, LassoLarsCV, ElasticNetCV): try: alphas = AlphaSelection(model()) except YellowbrickTypeError: self.fail("could not instantiate RegressorCV on alpha selection")
def connectWidgets(self): self.elasticNetCVGroupBox.setHidden(True) en = ElasticNet() encv = ElasticNetCV() self.alpha_text.setText(str(en.alpha)) self.enl1_ratioDoubleSpinBox.setValue(en.l1_ratio) self.enfit_interceptCheckBox.setChecked(en.fit_intercept) self.ennormalizeCheckBox.setChecked(en.normalize) self.enprecomputeCheckBox.setChecked(en.precompute) self.enmax_iterSpinBox.setValue(en.max_iter) self.encopy_XCheckBox.setChecked(en.copy_X) self.entolDoubleSpinBox.setValue(en.tol) self.enwarm_startCheckBox.setChecked(en.warm_start) self.enpositiveCheckBox.setChecked(en.positive) self.setComboBox(self.enselectionComboBox, ['cyclic', 'random']) self.defaultComboItem(self.enselectionComboBox, en.selection) self.l1_ratioDoubleSpinBox.setValue(encv.l1_ratio) self.epsDoubleSpinBox.setValue(encv.eps) self.n_alphasSpinBox.setValue(encv.n_alphas) self.alphasLineEdit.setText('None') self.fit_interceptCheckBox.setChecked(encv.fit_intercept) self.normalizeCheckBox.setChecked(encv.normalize) self.setComboBox(self.precomputeComboBox, ['True', 'False', 'auto', 'array-like']) self.defaultComboItem(self.precomputeComboBox, encv.precompute) self.max_iterSpinBox.setValue(encv.max_iter) self.tolDoubleSpinBox.setValue(encv.tol) self.cVSpinBox.setValue(3) self.copy_XCheckBox.setChecked(encv.copy_X) self.verboseCheckBox.setChecked(encv.verbose) self.n_jobsSpinBox.setValue(encv.n_jobs) self.positiveCheckBox.setChecked(encv.positive) self.setComboBox(self.selectionComboBox, ['cyclic', 'random']) self.defaultComboItem(self.selectionComboBox, encv.selection)
def build_model(train_file, attr_file, model_out, algorithm='ridge'): classifiers = ['ridge', 'linear', 'lasso', 'rf', 'en'] if algorithm not in classifiers: raise NotImplementedError("only implemented algorithms: " + str(classifiers)) train_data = pd.read_pickle(train_file) attrs = read_attrs(attr_file) target_attr = attrs[0] usable_attrs = attrs[1:] if algorithm == 'ridge': clf = Ridge() elif algorithm == 'linear': clf = LinearRegression() elif algorithm == 'lasso': clf = Lasso() elif algorithm == 'en': clf = ElasticNet() else: clf = RandomForestRegressor() logger.debug("Modeling '%s'", target_attr) logger.debug(" train set (%d): %s", len(train_data), train_file) logger.debug(" Algorithm: %s", algorithm) if hasattr(clf, 'coef_'): logger.debug('Coefficients:') for i,c in enumerate(clf.coef_): logger.debug(' %-20s' % usable_attrs[i] + ':', '%20.4f' % c) clf.fit(train_data[usable_attrs], train_data[target_attr]) pickle.dump(clf, open(model_out, 'wb'))
def gs_ElasticNet( xM, yV, en_params): print(xM.shape, yV.shape) clf = linear_model.ElasticNet() kf5_c = model_selection.KFold( n_splits = 5, shuffle=True) kf5 = kf5_c.split( xM) gs = model_selection.GridSearchCV( clf, en_params, scoring = 'r2', cv = kf5_c, n_jobs = -1) gs.fit( xM, yV) return gs
def gs_ElasticNet( xM, yV, en_params): print(xM.shape, yV.shape) clf = linear_model.ElasticNet() kf5_c = model_selection.KFold( n_folds=5, shuffle=True) kf5 = kf5_c.split( xM) gs = model_selection.GridSearchCV( clf, en_params, scoring = 'r2', cv = kf5, n_jobs = -1) gs.fit( xM, yV) return gs
def gen_WR_pilot_ch(self, pilot_SNRdB, alpha_l1r = 0, model = "Ridge"): """ The reception process with pilot channel estimation is conducted. """ Npilot = self.Npilot SNRpilot = db2var( pilot_SNRdB) BPSK, s_a, x_flat_a, x_a = gen_BPSK( Npilot, self.Nt) # H_a = gen_H( self.Nr, self.Nt) # H_a = self.H_a y_a = gen_Rx( self.Nr, Npilot, SNRpilot, self.H_a, x_a) yT_a = y_a.T # print( x_a.shape, yT_a.shape) # Now you can use either Ridge or Lasso methods. #lm = linear_model.Ridge( alpha) if model == "ElasticNet": lm = linear_model.ElasticNet( alpha_l1r[0], alpha_l1r[1]) else: lm = getattr( linear_model, model)(alpha_l1r) lm.fit( yT_a, x_a) self.W_a = lm.coef_ # print( "np.dot( W_a, H_a) =", np.dot( self.W_a, self.H_a)) self.gen_Decoding()
def gen_WR_pilot_only(self, alpha_l1r = 0): """ yT_a and x_a was prepared already. Now, W_a is calculated using alpha and then, decode data. For linear regression, alpha_l1r should not be specified except 0. """ yT_a = self.rx_p["yT_a"] x_a = self.rx_p["x_a"] # for alpha == 0, model is changed to linear regression. if alpha_l1r == 0: model = "LinearRegression" else: model = self.model if model == "LinearRegression": lm = linear_model.LinearRegression() elif model == "ElasticNet": lm = linear_model.ElasticNet( alpha_l1r[0], alpha_l1r[1]) else: # This is either Ridge or Lasso lm = getattr( linear_model, model)(alpha_l1r) lm.fit( yT_a, x_a) self.W_a = lm.coef_ # print( "np.dot( W_a, H_a) =", np.dot( self.W_a, self.H_a)) self.gen_Decoding()
def gs_ElasticNet(xM, yV, en_params): print(xM.shape, yV.shape) clf = linear_model.ElasticNet() kf5_c = model_selection.KFold(n_splits=5, shuffle=True) kf5 = kf5_c.split(xM) gs = model_selection.GridSearchCV( clf, en_params, scoring='r2', cv=kf5, n_jobs=-1) gs.fit(xM, yV) return gs
def gs_ElasticNet( xM, yV, en_params): print(xM.shape, yV.shape) clf = linear_model.ElasticNet() kf5 = cross_validation.KFold( xM.shape[0], n_folds=5, shuffle=True) gs = grid_search.GridSearchCV( clf, en_params, scoring = 'r2', cv = kf5, n_jobs = -1) gs.fit( xM, yV) return gs
def getModels(): result = [] result.append("LinearRegression") result.append("BayesianRidge") result.append("ARDRegression") result.append("ElasticNet") result.append("HuberRegressor") result.append("Lasso") result.append("LassoLars") result.append("Rigid") result.append("SGDRegressor") result.append("SVR") result.append("MLPClassifier") result.append("KNeighborsClassifier") result.append("SVC") result.append("GaussianProcessClassifier") result.append("DecisionTreeClassifier") result.append("RandomForestClassifier") result.append("AdaBoostClassifier") result.append("GaussianNB") result.append("LogisticRegression") result.append("QuadraticDiscriminantAnalysis") return result
def test_ElasticNet(*data): ''' test for Elastic Net :param data: train_data, test_data, train_value, test_value :return: None ''' X_train,X_test,y_train,y_test=data regr = linear_model.ElasticNet() regr.fit(X_train, y_train) print('Coefficients:{0}, intercept {1}'.format(regr.coef_, regr.intercept_)) print("Residual sum of squares: {0}".format(np.mean((regr.predict(X_test) - y_test) ** 2))) print('Score: {0}'.format(regr.score(X_test, y_test)))
def test_ElasticNet_alpha_rho(*data): ''' test score with different alpha and l1_ratio :param data: train_data, test_data, train_value, test_value :return: None ''' X_train,X_test,y_train,y_test=data alphas=np.logspace(-2,2) rhos=np.linspace(0.01,1) scores=[] for alpha in alphas: for rho in rhos: regr = linear_model.ElasticNet(alpha=alpha,l1_ratio=rho) regr.fit(X_train, y_train) scores.append(regr.score(X_test, y_test)) ## graph alphas, rhos = np.meshgrid(alphas, rhos) scores=np.array(scores).reshape(alphas.shape) from mpl_toolkits.mplot3d import Axes3D # this part works well in py3 from matplotlib import cm fig=plt.figure() ax=Axes3D(fig) surf = ax.plot_surface(alphas, rhos, scores, rstride=1, cstride=1, cmap=cm.jet, linewidth=0, antialiased=False) fig.colorbar(surf, shrink=0.5, aspect=5) ax.set_xlabel(r"$\alpha$") ax.set_ylabel(r"$\rho$") ax.set_zlabel("score") ax.set_title("ElasticNet") plt.show()
def test_elasticnet_convergence(self): # Check that the SGD output is consistent with coordinate descent n_samples, n_features = 1000, 5 rng = np.random.RandomState(0) X = np.random.randn(n_samples, n_features) # ground_truth linear model that generate y from X and to which the # models should converge if the regularizer would be set to 0.0 ground_truth_coef = rng.randn(n_features) y = np.dot(X, ground_truth_coef) # XXX: alpha = 0.1 seems to cause convergence problems for alpha in [0.01, 0.001]: for l1_ratio in [0.5, 0.8, 1.0]: cd = linear_model.ElasticNet(alpha=alpha, l1_ratio=l1_ratio, fit_intercept=False) cd.fit(X, y) sgd = self.factory(penalty='elasticnet', n_iter=50, alpha=alpha, l1_ratio=l1_ratio, fit_intercept=False) sgd.fit(X, y) err_msg = ("cd and sgd did not converge to comparable " "results for alpha=%f and l1_ratio=%f" % (alpha, l1_ratio)) assert_almost_equal(cd.coef_, sgd.coef_, decimal=2, err_msg=err_msg)
def submit(self): """""" ## retrain with the whole training data self.TrainData = self.TrainData[(self.TrainData['logerror'] > self._low) & (self.TrainData['logerror'] < self._up)] self.TrainData['longitude'] -= -118600000 self.TrainData['latitude'] -= 34220000 X = self.TrainData.drop(self._l_drop_cols, axis=1) Y = self.TrainData['logerror'] X = X.values.astype(np.float32, copy=False) en = ElasticNet(alpha= self._alpha, l1_ratio = self._ratio, max_iter= self._iter, tol= 1e-4, selection= self._sel, random_state= 2017) self._model = en.fit(X, Y) del self.TrainData, X, Y gc.collect() self.TestData = self._data.LoadFromHdfFile(self.InputDir, 'test') #self.TestData = self.TestData.sample(frac = 0.01) self._sub = pd.DataFrame(index=self.TestData.index) self._sub['ParcelId'] = self.TestData['parcelid'] self.TestData['longitude'] -= -118600000 self.TestData['latitude'] -= 34220000 N = 200000 start = time.time() for d in self._l_test_predict_columns: s0 = time.time() print('Prediction for column %s ' % d) l_test_columns = ['%s%s' % (c, d) if (c in ['lastgap', 'monthyear', 'buildingage']) else c for c in self._l_train_columns] x_test = self.TestData[l_test_columns] for idx in range(0, len(x_test), N): x_test_block = x_test[idx:idx + N].values.astype(np.float32, copy=False) ret = self._model.predict(x_test_block)# * 0.99 + 0.011 * 0.01 self._sub.loc[x_test[idx:idx + N].index, d] = ret print(np.mean(np.abs(ret))) e0 = time.time() print('Prediction for column %s is done. time elapsed %ds' % (d, (e0 - s0))) ## clean del self.TestData gc.collect() end = time.time() print('Prediction is done. time elapsed %ds' % (end - start)) if (os.path.exists(self.OutputDir) == False): os.makedirs(self.OutputDir) self._sub.to_csv( '{0}/{1}_{2}.csv'.format(self.OutputDir, self.__class__.__name__, datetime.now().strftime('%Y%m%d-%H:%M:%S')), index=False, float_format='%.4f')
def build(self, dataset): evaluators = [] cv = 5 # todo: have to adjust to dataset size if self.field_manager.target.is_categorizable(): parameter_candidates = [ {"kernel": ["linear"], "C": [1, 10, 100]}, {"kernel": ["rbf"], "gamma": [1e-1, 1e-2, 1e-3, 1e-4], "C": [1, 10, 100]} ] # todo: have to think about scoring parameter (default is accuracy, so f1 related score may be appropriate) evaluator = GridSearchCV( SVC(C=1), parameter_candidates, cv=cv ) evaluators.append(evaluator) else: evaluator1 = GridSearchCV( linear_model.ElasticNet(), {"alpha": [0.1, 0.5, 0.7, 1], "l1_ratio": [(r + 1) / 10 for r in range(10)]}, cv=cv ) parameter_candidates = [ {"kernel": ["rbf"], "gamma": [1e-3, 1e-4], "C": [1, 10, 100]} ] # todo: have to think about scoring parameter (default is accuracy, so f1 related score may be appropriate) evaluator2 = GridSearchCV( SVR(C=1), parameter_candidates, cv=cv ) evaluators.append(evaluator1) evaluators.append(evaluator2) self.model_score = 0 self.model = None for e in evaluators: e.fit(dataset.data, dataset.target) if e.best_score_ > self.model_score: self.model_score = e.best_score_ self.model = e.best_estimator_
def function(self): p_attrib = {'False': False, 'True': True, 'Array-like': 'array-like'} r_attrib = {'None': None} # TODO Add back the random state later. # try: # r_state = int(self.randomStateLineEdit.text()) # except: # r_state = r_attrib[self.randomStateLineEdit.text()] index = self.precomputeComboBox.currentIndex() precomputeComboBox = self.precomputeComboBox.itemText(index) if self.CVCheckBox.isChecked(): params = { 'l1_ratio': self.l1_ratioDoubleSpinBox.value(), 'eps': self.epsDoubleSpinBox.value(), 'n_alphas': self.n_alphasSpinBox.value(), 'alphas': {'None': None}.get(self.alphasLineEdit.text()), 'fit_intercept': self.fit_interceptCheckBox.isChecked(), 'normalize': self.normalizeCheckBox.isChecked(), 'precompute': self.precomputeComboBox.currentText(), 'max_iter': self.max_iterSpinBox.value(), 'tol': self.max_iterSpinBox.value(), 'cv': self.cVSpinBox.value(), 'copy_X': self.copy_XCheckBox.isChecked(), 'verbose': self.verboseCheckBox.isChecked(), 'n_jobs': self.n_jobsSpinBox.value(), 'positive': self.positiveCheckBox.isChecked(), 'selection': self.selectionComboBox.currentText(), 'CV': self.CVCheckBox.isChecked()} return params, self.getChangedValues(params, ElasticNetCV()) else: params = { 'alpha': self.alpha_text.value(), 'l1_ratio': self.enl1_ratioDoubleSpinBox.value(), 'fit_intercept': self.enfit_interceptCheckBox.isChecked(), 'normalize': self.ennormalizeCheckBox.isChecked(), 'precompute': self.enprecomputeCheckBox.isChecked(), 'max_iter': self.enmax_iterSpinBox.value(), 'copy_X': self.encopy_XCheckBox.isChecked(), 'tol': self.entolDoubleSpinBox.value(), 'warm_start': self.enwarm_startCheckBox.isChecked(), 'positive': self.enpositiveCheckBox.isChecked(), 'selection': self.selectionComboBox.currentText(), 'CV': self.CVCheckBox.isChecked()} return params, self.getChangedValues(params, ElasticNet())
def build_model(train_file, test_file, attr_file, model_out, predictions_out, algorithm='ridge'): classifiers = ['ridge', 'linear', 'lasso', 'rf', 'en'] if algorithm not in classifiers: raise NotImplementedError("only implemented algorithms: " + str(classifiers)) train_data = pd.read_pickle(train_file) attrs = read_attrs(attr_file) target_attr = attrs[0] usable_attrs = attrs[1:] if algorithm == 'ridge': clf = Ridge() elif algorithm == 'linear': clf = LinearRegression() elif algorithm == 'lasso': clf = Lasso() elif algorithm == 'en': clf = ElasticNet() else: clf = RandomForestRegressor() clf.fit(train_data[usable_attrs], train_data[target_attr]) test_data = pd.read_pickle(test_file) predictions = clf.predict(test_data[usable_attrs]) errors = predictions - test_data[target_attr] prediction_results = test_data[[target_attr] + usable_attrs].copy() prediction_results['predicted'] = predictions prediction_results.to_pickle(predictions_out) print "Modeling '%s'" % target_attr print " Train:", train_file, '(%d examples)' % len(train_data) print " Test:", test_file, '(%d examples)' % len(test_data) print "Algorithm:", algorithm if hasattr(clf, 'coef_'): print 'Coefficients:' for i,c in enumerate(clf.coef_): print ' %-20s' % usable_attrs[i] + ':', '%20.4f' % c print 'MSE : %10.4f' % np.mean(errors ** 2) print 'medSE: %10.4f' % np.median(errors ** 2) print 'SSE : %10.4f' % np.sum(errors ** 2) print 'Variance score: %.4f' % clf.score(test_data[usable_attrs], test_data[target_attr]) pickle.dump(clf, open(model_out, 'wb'))
def getSKLearnModel(modelName): if modelName == 'LinearRegression': model = linear_model.LinearRegression() elif modelName == 'BayesianRidge': model = linear_model.BayesianRidge() elif modelName == 'ARDRegression': model = linear_model.ARDRegression() elif modelName == 'ElasticNet': model = linear_model.ElasticNet() elif modelName == 'HuberRegressor': model = linear_model.HuberRegressor() elif modelName == 'Lasso': model = linear_model.Lasso() elif modelName == 'LassoLars': model = linear_model.LassoLars() elif modelName == 'Rigid': model = linear_model.Ridge() elif modelName == 'SGDRegressor': model = linear_model.SGDRegressor() elif modelName == 'SVR': model = SVR() elif modelName=='MLPClassifier': model = MLPClassifier() elif modelName=='KNeighborsClassifier': model = KNeighborsClassifier() elif modelName=='SVC': model = SVC() elif modelName=='GaussianProcessClassifier': model = GaussianProcessClassifier() elif modelName=='DecisionTreeClassifier': model = DecisionTreeClassifier() elif modelName=='RandomForestClassifier': model = RandomForestClassifier() elif modelName=='AdaBoostClassifier': model = AdaBoostClassifier() elif modelName=='GaussianNB': model = GaussianNB() elif modelName=='LogisticRegression': model = linear_model.LogisticRegression() elif modelName=='QuadraticDiscriminantAnalysis': model = QuadraticDiscriminantAnalysis() return model