def try_params( n_iterations, params ): n_iterations = int( round( n_iterations )) print "n_iterations:", n_iterations pprint( params ) if params['scaler']: scaler = eval( "{}()".format( params['scaler'] )) x_train_ = scaler.fit_transform( data['x_train'].astype( float )) x_test_ = scaler.transform( data['x_test'].astype( float )) local_data = { 'x_train': x_train_, 'y_train': data['y_train'], 'x_test': x_test_, 'y_test': data['y_test'] } else: local_data = data # we need a copy because at the next small round the best params will be re-used params_ = dict( params ) params_.pop( 'scaler' ) clf = SGD( n_iter = n_iterations, **params_ ) return train_and_eval_sklearn_regressor( clf, local_data )
def psgd_method(args): """ SGD method run in parallel using map. Parameters ---------- args: tuple (sgd, data), where sgd is SGDRegressor object and data is a tuple: (X_train, y_train) Returns ------- sgd: object returned after executing .fit() """ sgd, data = args X_train, y_train = data sgd.fit(X_train, y_train) return sgd
def psgd_method_1(sgd, X_train, y_train): """ SGD method run in parallel using map. Parameters ---------- args: tuple (sgd, data), where sgd is SGDRegressor object and data is a tuple: (X_train, y_train) Returns ------- sgd: object returned after executing .fit() """ sgd.fit(X_train, y_train) return sgd
def psgd_method_2(sgd, loop_iter, coef, intercept, X_train, y_train): """ SGD method run in parallel using map. Parameters ---------- args: tuple (sgd, data), where sgd is SGDRegressor object and data is a tuple: (X_train, y_train) Returns ------- sgd: object returned after executing .fit() """ for _ in range(loop_iter): sgd.coef_ = coef sgd.intercept_ = intercept sgd.fit(X_train, y_train) coef = sgd.coef_ intercept = sgd.intercept_ return sgd
def psgd_1(sgd, n_iter_per_job, n_jobs, X_train, y_train): """ Parallel SGD implementation using multiprocessing. All workers sync once after running SGD independently for n_iter_per_job iterations. Parameters ---------- sgd: input SGDRegression() object n_iter_per_job: number of iterations per worker n_jobs: number of parallel processes to run X_train: train input data y_train: train target data Returns ------- sgd: the input SGDRegressor() object with updated coef_ and intercept_ """ sgds = Parallel(n_jobs=n_jobs)( delayed(psgd_method_1)(s, X_train, y_train) for s in [SGDRegressor(n_iter=n_iter_per_job) for _ in range(n_jobs)]) sgd.coef_ = np.array([x.coef_ for x in sgds]).mean(axis=0) sgd.intercept_ = np.array([x.intercept_ for x in sgds]).mean(axis=0) return sgd
def define_model(self): #if self.modeltype == "AR" : # return statsmodels.tsa.ar_model.AR(max_order=self.parameters['max_order']) if self.modeltype == "RandomForest" : return ensemble.RandomForestRegressor(n_estimators=self.parameters['n_estimators']) #return ensemble.RandomForestClassifier( # n_estimators=self.parameters['n_estimators']) elif self.modeltype == "LinearRegression" : return linear_model.LinearRegression() elif self.modeltype == "Lasso" : return linear_model.Lasso( alpha=self.parameters['alpha']) elif self.modeltype == "ElasticNet" : return linear_model.ElasticNet( alpha=self.parameters['alpha'], l1_ratio=self.parameters['l1_ratio']) elif self.modeltype == "SVR" : return SVR( C=self.parameters['C'], epsilon=self.parameters['epsilon'], kernel=self.parameters['kernel']) #elif self.modeltype == 'StaticModel': # return StaticModel ( # parameters=self.parameters # ) #elif self.modeltype == 'AdvancedStaticModel': # return AdvancedStaticModel ( # parameters=self.parameters # ) # elif self.modeltype == 'SGDRegressor' : # print(self.parameters) # return linear_model.SGDRegressor( # loss=self.parameters['loss'], # penalty=self.parameters['penalty'], # l1_ratio=self.parameters['l1_ratio']) else: raise ConfigError("Unsupported model {0}".format(self.modeltype))
def test_basic(self, single_chunk_regression): X, y = single_chunk_regression a = lm.PartialSGDRegressor(random_state=0, max_iter=1000, tol=1e-3) b = lm_.SGDRegressor(random_state=0, max_iter=1000, tol=1e-3) a.fit(X, y) b.partial_fit(X, y) assert_estimator_equal(a, b)
def SGD_regression_test_error(X, y, X_test, y_test, delta, SGD_epochs): # center training targets y_mean = np.mean(y) y_train = y - y_mean # solve primal problem clf = linear_model.SGDRegressor(alpha=delta, fit_intercept=False, n_iter=SGD_epochs) clf.fit(X, y_train) y_hat_test = y_mean + X_test.dot(clf.coef_) return 100.0 * np.linalg.norm(y_hat_test - y_test) / np.linalg.norm(y_test) # BINARY SEARCH KERNEL WIDTH
def demo(): import sys sys.path.append( '../core' ) from tools import make_XOR_dataset from BR import BR set_printoptions(precision=3, suppress=True) X,Y = make_XOR_dataset() N,L = Y.shape print("CLASSIFICATION") h = linear_model.SGDClassifier(n_iter=100) nn = ELM(8,f=tanh,h=BR(-1,h)) nn.fit(X, Y) # test it print(nn.predict(X)) print("vs") print(Y) print("REGRESSION") r = ELM(100,h=linear_model.LinearRegression()) r.fit(X,Y) print(Y) print(r.predict(X)) print("REGRESSION OI") r = ELM_OI(100,h=BR(-1,h=linear_model.SGDRegressor())) r.fit(X,Y) print(Y) print(r.predict(X))
def __init__(self): # We create a separate model for each action in the environment's # action space. Alternatively we could somehow encode the action # into the features, but this way it's easier to code up. self.actions = [] for _ in xrange(env.action_space.n): act = SGDRegressor(learning_rate="constant") # We need to call partial_fit once to initialize the model # or we get a NotFittedError when trying to make a prediction # This is quite hacky. act.partial_fit([self.featurize_state(env.reset())], [0]) self.actions.append(act)
def train_sgd_regressor(): # Picking model return mp.ModelProperties(regression=True, online=True), linear_model.SGDRegressor() # http://scikit-learn.org/stable/modules/generated/sklearn.linear_model.PassiveAggressiveRegressor.html#sklearn.linear_model.PassiveAggressiveRegressor
def adbPredictor(df): dataTrainX, dataTrainY, dataTestX, dataTestY = sample(df) # clf = linear_model.SGDRegressor() clf = ensemble.AdaBoostRegressor() clf.fit(dataTrainX, dataTrainY) predicted = clf.predict(dataTestX) fig, ax = plotter.subplots() ax.set_ylabel('Predicted KNN Weekly') ax.scatter(dataTestY, predicted) ax.set_xlabel('Measured') predicted = np.reshape(predicted, (predicted.size, 1)) corrCoeff = pearsonr(dataTestY,predicted) print(corrCoeff[0]) plotter.show() return predicted
def __init__(self): OutputThing.__init__(self, ports=['train', 'observe', 'predict']) self.clf = linear_model.SGDRegressor()
def __init__(self): Publisher.__init__(self, topics=['train', 'observe', 'predict']) self.clf = linear_model.SGDRegressor()
def get_classifier(self, X, Y): """ ???????? :param X: ???? :param Y: ?????? :return: ?? """ clf = SGDRegressor() clf.fit(X, Y) return clf
def reset_args(self): """ """ assert self.max_iter % self.n_iter_per_step == 0 linear_model.SGDRegressor.__init__(self, alpha=self.alpha, penalty=self.penalty, n_iter=self.n_iter_per_step, **self.kwargs)
def fit(self,X,y): self.coef_ = None self.intercept_ = None self.stages_ = [] for i in range(0,self.max_iter,self.n_iter): if self.coef_ is not None: assert(self.intercept_ is not None) linear_model.SGDRegressor.fit(self,X,y,coef_init=self.coef_,intercept_init=self.intercept_) else: linear_model.SGDRegressor.fit(self,X,y) # record coefs and intercept for later self.stages_.append((i+self.n_iter,self.coef_.copy(),self.intercept_.copy())) logging.info('done %d/%d steps' % (i+self.n_iter,self.max_iter)) logging.info('training set auc %f' % self.auc(X,y))
def predict(self,X,coef=None,intercept=None): """ a) do the prediction based on given coefs and intercept, if provided. b) Scale the predictions so that they are in 0..1. """ if coef is not None: assert intercept is not None self.intercept_ = intercept self.coef_ = coef return scale_predictions(linear_model.SGDRegressor.predict(self,X))
def buildNet(self): net = linear_model.SGDRegressor( n_iter = 1, alpha = 0.0001, shuffle=False ) return net
def getModels(): result = [] result.append("LinearRegression") result.append("BayesianRidge") result.append("ARDRegression") result.append("ElasticNet") result.append("HuberRegressor") result.append("Lasso") result.append("LassoLars") result.append("Rigid") result.append("SGDRegressor") result.append("SVR") result.append("MLPClassifier") result.append("KNeighborsClassifier") result.append("SVC") result.append("GaussianProcessClassifier") result.append("DecisionTreeClassifier") result.append("RandomForestClassifier") result.append("AdaBoostClassifier") result.append("GaussianNB") result.append("LogisticRegression") result.append("QuadraticDiscriminantAnalysis") return result
def __getLearner(self): return SGDRegressor(loss=self.loss, penalty=self.penalty, alpha=self.alpha, l1_ratio=self.l1_ratio, fit_intercept=self.intercept)
def __init__(self, T, L, backup): self.backup = backup self.T = T self.L = L self.pre_process = PolynomialFeatures(degree=2, include_bias=False) if self.backup['name'] == 'sampling': self.Q = linear_model.SGDRegressor(loss='huber', penalty='l2', learning_rate='invscaling', eta0=0.1, power_t=0.25, warm_start=False) elif self.backup['name'] == 'doubleQ': self.Q_1 = linear_model.SGDRegressor(loss='huber', penalty='l2', learning_rate='invscaling', eta0=0.1, power_t=0.25, warm_start=False) self.Q_2 = linear_model.SGDRegressor(loss='huber', penalty='l2', learning_rate='invscaling', eta0=0.1, power_t=0.25, warm_start=False) elif self.backup['name'] == 'replay buffer': self.Q = linear_model.SGDRegressor(loss='huber', penalty='l2', learning_rate='invscaling', eta0=0.1, power_t=0.25, warm_start=False) self.buff = [] else: print "Illegal Backup Type"
def parallel_sgd(pool, sgd, n_iter, n_jobs, n_sync, data): """ High level parallelization of SGDRegressor. Parameters ---------- pool: multiprocessor pool to use for this parallelization sgd: SGDRegressor instance whose coef and intercept need to be updated n_iter: number of iterations per worker n_jobs: number of parallel workers n_sync: number of synchronization steps. Syncs are spread evenly through out the iterations data: list of (X, y) data for the workers. This list should have n_jobs elements Returns ------- sgd: SGDRegressor instance with updated coef and intercept """ # eta = sgd.eta0*n_jobs eta = sgd.eta0 n_iter_sync = n_iter/n_sync # Iterations per model between syncs sgds = [SGDRegressor(warm_start=True, n_iter=n_iter_sync, eta0=eta) for _ in range(n_jobs)] for _ in range(n_sync): args = zip(sgds, data) sgds = pool.map(psgd_method, args) coef = np.array([x.coef_ for x in sgds]).mean(axis=0) intercept = np.array([x.intercept_ for x in sgds]).mean(axis=0) for s in sgds: s.coef_ = coef s.intercept_ = intercept sgd.coef_ = coef sgd.intercept_ = intercept return sgd
def psgd_3(sgd, n_iter_per_job, n_jobs, n_syncs, X_train, y_train): """ Parallel SGD implementation using multiprocessing. All workers sync n_syncs times while running SGD independently for n_iter_per_job iterations. Each worker will have an increased learning rate -- multiple of n_jobs. Parameters ---------- sgd: input SGDRegression() object n_iter_per_job: number of iterations per worker n_jobs: number of parallel processes to run n_syncs: number of syncs X_train: train input data y_train: train target data Returns ------- sgd: the input SGDRegressor() object with updated coef_ and intercept_ """ n_iter_sync = n_iter_per_job/n_syncs # Iterations per model between syncs eta = sgd.eta0 * n_jobs sgds = [SGDRegressor(warm_start=True, n_iter=n_iter_sync, eta0=eta) for _ in range(n_jobs)] for _ in range(n_syncs): sgds = Parallel(n_jobs=n_jobs)( delayed(psgd_method_1)(s, X_train, y_train) for s in sgds) coef = np.array([x.coef_ for x in sgds]).mean(axis=0) intercept = np.array([x.intercept_ for x in sgds]).mean(axis=0) for s in sgds: s.coef_ = coef s.intercept_ = intercept sgd.coef_ = coef sgd.intercept_ = intercept return sgd
def psgd_4(sgd, n_iter_per_job, n_jobs, X_train, y_train, coef, intercept): """ Parallel SGD implementation using multithreading. All workers read coef and intercept from share memory, process them, and then overwrite them. Parameters ---------- sgd: input SGDRegression() object n_iter_per_job: number of iterations per worker n_jobs: number of parallel processes to run X_train: train input data y_train: train target data coef: randomly initialized coefs stored in shared memory intercept: randomly initialized intercept stored in shared memory Returns ------- sgd: the input SGDRegressor() object with updated coef_ and intercept_ """ sgds = [SGDRegressor(warm_start=True, n_iter=1) for _ in range(n_jobs)] sgds = Parallel(n_jobs=n_jobs, backend="threading")( delayed(psgd_method_2) (s, n_iter_per_job, coef, intercept, X_train, y_train) for s in sgds) sgd.coef_ = np.array([x.coef_ for x in sgds]).mean(axis=0) sgd.intercept_ = np.array([x.intercept_ for x in sgds]).mean(axis=0) return sgd
def __init__(self, mode='sequence', random_state=1234): """Constructor. Params ------ mode : str Values: 'sequence', 'rnafold' or 'rnaplfold'. n_jobs : int (default : -1) Number of jobs. random_state : int (default : 1234) Seed for random number generator. """ self.mode = mode self.max_dist = None self.preprocessor_args = dict() self.vectorizer_args = dict() self.regressor_args = dict() self.smoothing_args = dict() if mode == 'sequence': self.preprocessor = seq.sequence_preprocessor self.vote_aggregator = seq.vote_aggregator elif mode == 'rnafold' or mode == 'rnaplfold': if mode == 'rnafold': self.preprocessor = graph.rnafold_preprocessor else: self.preprocessor = graph.rnaplfold_preprocessor self.vote_aggregator = graph.vote_aggregator else: raise Exception("Unrecognized mode: %s" % mode) exit(1) self.regressor = SGDRegressor(shuffle=True, random_state=random_state) # status variables self.is_optimized = False self.is_fitted = False
def getSKLearnModel(modelName): if modelName == 'LinearRegression': model = linear_model.LinearRegression() elif modelName == 'BayesianRidge': model = linear_model.BayesianRidge() elif modelName == 'ARDRegression': model = linear_model.ARDRegression() elif modelName == 'ElasticNet': model = linear_model.ElasticNet() elif modelName == 'HuberRegressor': model = linear_model.HuberRegressor() elif modelName == 'Lasso': model = linear_model.Lasso() elif modelName == 'LassoLars': model = linear_model.LassoLars() elif modelName == 'Rigid': model = linear_model.Ridge() elif modelName == 'SGDRegressor': model = linear_model.SGDRegressor() elif modelName == 'SVR': model = SVR() elif modelName=='MLPClassifier': model = MLPClassifier() elif modelName=='KNeighborsClassifier': model = KNeighborsClassifier() elif modelName=='SVC': model = SVC() elif modelName=='GaussianProcessClassifier': model = GaussianProcessClassifier() elif modelName=='DecisionTreeClassifier': model = DecisionTreeClassifier() elif modelName=='RandomForestClassifier': model = RandomForestClassifier() elif modelName=='AdaBoostClassifier': model = AdaBoostClassifier() elif modelName=='GaussianNB': model = GaussianNB() elif modelName=='LogisticRegression': model = linear_model.LogisticRegression() elif modelName=='QuadraticDiscriminantAnalysis': model = QuadraticDiscriminantAnalysis() return model
def get_model_list(task_name): model_list, name_list = [], [] model_list.append(linear_model.LinearRegression()) name_list.append('LR') # model_list.append(linear_model.SGDRegressor()) name_list.append('LR_SGD') model_list.append(linear_model.Lasso(alpha = 1.0)) name_list.append('Lasso') model_list.append(linear_model.Ridge (alpha = 1.0)) name_list.append('Ridge') model_list.append(linear_model.LassoLars(alpha=.1)) name_list.append('LassoLars') model_list.append(linear_model.BayesianRidge()) name_list.append('BayesianRidge') model_list.append(KernelRidge(alpha=1.0)) name_list.append('KernelRidge') model_list.append(gaussian_process.GaussianProcess(theta0=1e-2, thetaL=1e-4, thetaU=1e-1)) name_list.append('GaussianProcess') model_list.append(KNeighborsRegressor(weights = 'uniform',n_neighbors=3)) name_list.append('KNN_unif') model_list.append(KNeighborsRegressor(weights = 'distance',n_neighbors=3)) name_list.append('KNN_dist') model_list.append(SVR(kernel = 'linear', C = 1, gamma = 'auto', coef0 = 0, degree = 2)) name_list.append('SVM_linear') model_list.append(SVR(kernel = 'poly', C = 1, gamma = 'auto', coef0 = 0, degree = 2)) name_list.append('SVM_poly') model_list.append(SVR(kernel = 'rbf', C = 1, gamma = 'auto', coef0 = 0, degree = 2)) name_list.append('SVM_rbf') model_list.append(DecisionTreeRegressor()) name_list.append('DT') model_list.append(RandomForestRegressor(n_estimators=100, max_depth=None,min_samples_split=2, random_state=0)) name_list.append('RF') model_list.append(ExtraTreesRegressor(n_estimators=100, max_depth=None, max_features='auto', min_samples_split=2, random_state=0)) name_list.append('ET') return model_list, name_list