我们从Python开源项目中,提取了以下14个代码示例,用于说明如何使用sklearn.linear_model.LogisticRegressionCV()。
def get_model(): if FLAGS.model == 'logistic': return linear_model.LogisticRegressionCV(class_weight='balanced', scoring='roc_auc', n_jobs=FLAGS.n_jobs, max_iter=10000, verbose=1) elif FLAGS.model == 'random_forest': return ensemble.RandomForestClassifier(n_estimators=100, n_jobs=FLAGS.n_jobs, class_weight='balanced', verbose=1) elif FLAGS.model == 'svm': return grid_search.GridSearchCV( estimator=svm.SVC(kernel='rbf', gamma='auto', class_weight='balanced'), param_grid={'C': np.logspace(-4, 4, 10)}, scoring='roc_auc', n_jobs=FLAGS.n_jobs, verbose=1) else: raise ValueError('Unrecognized model %s' % FLAGS.model)
def init_clf(clf_used, params=None): if params is not None: params_used = params elif clf_used == 'svm': params_used = svm_params elif clf_used == 'ada_boost': params_used = rf_params elif clf_used == 'lr': params_used = lr_params else: params_used = rf_params if clf_used == 'svm': clf = SVC(**params_used) elif clf_used == 'ada_boost': rf = RandomForestClassifier(**rf_params) clf = AdaBoostClassifier(base_estimator=rf, **params_used) elif clf_used == 'lr': clf = LogisticRegressionCV(**params_used) else: clf = RandomForestClassifier(**params_used) return clf
def train_lr(densities_pos, densities_neg, uncerts_pos, uncerts_neg): """ TODO :param densities_pos: :param densities_neg: :param uncerts_pos: :param uncerts_neg: :return: """ values_neg = np.concatenate( (densities_neg.reshape((1, -1)), uncerts_neg.reshape((1, -1))), axis=0).transpose([1, 0]) values_pos = np.concatenate( (densities_pos.reshape((1, -1)), uncerts_pos.reshape((1, -1))), axis=0).transpose([1, 0]) values = np.concatenate((values_neg, values_pos)) labels = np.concatenate( (np.zeros_like(densities_neg), np.ones_like(densities_pos))) lr = LogisticRegressionCV(n_jobs=-1).fit(values, labels) return values, labels, lr
def predict(self,X): self.sents_test=X self.sents_all=self.sents_train + self.sents_test if self.sents_shuffle : s_indexs=range(len(self.sents_all)) random.shuffle(s_indexs) s_invers_indexs=range(len(s_indexs)) for n in range(len(s_indexs)): s_invers_indexs[s_indexs[n]]=n sents_all=[self.sents_all[n] for n in s_indexs] else: sents_all=self.sents_all all_docs = list(LabeledListSentence(self.sents_all)) self.doc2vec_set(all_docs) #print 'size',self.doc2vec.vector_size self.X_train= [self.doc2vec.infer_vector(s) for s in self.sents_train] self.X_test= [self.doc2vec.infer_vector(s) for s in self.sents_test] self.logistic =LogisticRegressionCV(class_weight='balanced')#,n_jobs=-1) self.logistic.fit(self.X_train,self.Y_train) Y_test_predict=self.logistic.predict(self.X_test) return Y_test_predict
def __init__(self, data, N_i, N_c, *args, **kwargs): """ Fit a random forest model to a Dataset object. N_i, N_c: parameters defining allowed time windows. See the transform_X method. args, kwargs: passed to the LogisticRegressionCV constructor. """ Wrapper.__init__(self,data,N_i,N_c) kwargs['n_estimators'] = 128 self.classifier = RandomForestClassifier(*args, **kwargs) self.classifier.fit(self.fit_X,self.fit_y)
def __init__(self, data, N_i, N_c, *args, **kwargs): """ Fit a random forest model to a Dataset object. N_i, N_c: parameters defining allowed time windows. See the transform_X method. args, kwargs: passed to the LogisticRegressionCV constructor. """ Wrapper.__init__(self,data,N_i,N_c) kwargs['n_estimators'] = 1024 self.classifier = RandomForestClassifier(*args, **kwargs) self.classifier.fit(self.fit_X,self.fit_y)
def __init__(self, data, N_i, N_c, *args, **kwargs): """ Fit a random forest model to a Dataset object. N_i, N_c: parameters defining allowed time windows. See the transform_X method. args, kwargs: passed to the LogisticRegressionCV constructor. """ Wrapper.__init__(self,data,N_i,N_c) kwargs['n_estimators'] = 32768 self.classifier = RandomForestClassifier(*args, **kwargs) self.classifier.fit(self.fit_X,self.fit_y)
def __init__(self, data, N_i, N_c, *args, **kwargs): """ Fit a regularized logistic regression model to a Dataset object. By default, uses L1 regularization with the strength chosen from 10 options spaced logarithmically between 1e-4 and 1e4 (the sklearn LogisticRegressionCV default) using min(10,data.n_subjects) folds of crossvalidation, but other options may be chosen by specifing arguments to the LogisticRegressionCV constructor through *args and **kwargs. N_i, N_c: parameters defining allowed time windows. See the transform_X method. args, kwargs: passed to the LogisticRegressionCV constructor. """ Wrapper.__init__(self,data,N_i,N_c) default_folds = min(10,data.n_subjects) default_classifier_arguments = { 'cv': default_folds, 'solver': 'liblinear', 'penalty': 'l1', } # Update with the arguments passed in by the user, clobbering # the default settings if alternate values are provided. default_classifier_arguments.update(kwargs) self.classifier = LogisticRegressionCV( *args, **default_classifier_arguments ) self.classifier.fit(self.fit_X,self.fit_y)
def logistic_fidelity(self): #group data and assign state labels gnd_features = np.hstack([np.real(self.ground_data.T), np.imag(self.ground_data.T)]) ex_features = np.hstack([np.real(self.excited_data.T), np.imag(self.excited_data.T)]) #liblinear wants arrays in C order features = np.ascontiguousarray(np.vstack([gnd_features, ex_features])) state = np.ascontiguousarray(np.hstack([np.zeros(self.ground_data.shape[1]), np.ones(self.excited_data.shape[1])])) #Set up logistic regression with cross-validation using liblinear. #Cs sets the inverse of the regularization strength, which will be optimized #through cross-validation. Uses the default Stratified K-Folds #CV generator, with 3 folds. #This is set up to be as consistent with the MATLAB implementation #as I can make it. --GJR Cs = np.logspace(-1,2,5) logreg = LogisticRegressionCV(Cs, cv=3, solver='liblinear') logreg.fit(features, state) #fit the model predictions = logreg.predict(features) #in-place classification score = logreg.score(features,state) #mean accuracy of classification N = len(predictions) S = np.sum(predictions == state) #how many we got right #now calculate confidence intervals c = 0.95 flo = betaincinv(S+1, N-S+1, (1-c)/2., ) fhi = betaincinv(S+1, N-S+1, (1+c)/2., ) logger.info(("In-place logistic regression fidelity: " + "{:.2f}% ({:.2f}, {:.2f})".format(100*score, 100*flo, 100*fhi)))
def build_logistic_RegressionCV(x_train, y_train): lr_cv_model = LogisticRegressionCV(n_jobs=-1, random_state=42,Cs=3, cv=10, refit=True, class_weight="balanced") lr_cv_model.fit(x_train, y_train) return lr_cv_model
def feature_selection_logit(xtr, ytr): model = LogisticRegressionCV(penalty='l1', solver='liblinear', cv=5) model.fit(xtr, ytr) columns = np.arange(xtr.shape[1])[~np.isclose(model.coef_.ravel(), 0)] return columns
def sklearn_logit(self,Xtrain,ytrain, Xtest, ytest): clf = linear_model.LogisticRegressionCV(penalty='l2', class_weight='balanced', intercept_scaling=1e3, cv=5) clf.fit (Xtrain, ytrain) coeffients = clf.coef_ print "coefficients:", coeffients print "intercept:", clf.intercept_ # predict train labels train_predictions = clf.predict(Xtrain) train_accuracy = self.calculate_accuracy(train_predictions, ytrain) print "train accuracy: ", train_accuracy * 100 MSE_train = self.calculate_MSE(train_predictions, ytrain) print "train MSE: ", MSE_train AIC_train = len(ytrain) * np.log(MSE_train) + 2 * (p + 1) print "train AIC:", AIC_train for i in range(len(train_predictions)): train_predictions[i] = round(train_predictions[i]) train_confMatrix = confusion_matrix(ytrain, train_predictions, labels = [1.0, 0.0]) print "train confusion matrix:", train_confMatrix # predict test labels test_predictions = clf.predict(Xtest) test_accuracy = self.calculate_accuracy(test_predictions, ytest) print "test accuracy: ", test_accuracy * 100 MSE_test = self.calculate_MSE(test_predictions, ytest) print "test MSE: ", MSE_test for i in range(len(test_predictions)): test_predictions[i] = round(test_predictions[i]) test_confMatrix = confusion_matrix(ytest, test_predictions, labels = [1.0, 0.0]) print "test confusion matrix:", test_confMatrix
def __init__(self, bootstrap_fraction, random_seed=None, feature_importance_metric=None, feature_importance_threshold=None, **kwargs): self.Cs = kwargs.get('Cs', 10) self.fit_intercept = kwargs.get('fit_intercept', True) self.cv = kwargs.get('cv', None) self.dual = kwargs.get('dual', False) self.scoring = kwargs.get('scoring', None) self.tol = kwargs.get('tol', 1e-4) self.max_iter = kwargs.get('max_iter', 100) self.class_weight = kwargs.get('class_weight', None) self.n_jobs = kwargs.get('n_jobs', 1) self.verbose = kwargs.get('verbose', 0) self.refit = kwargs.get('refit', True) self.intercept_scaling = kwargs.get('intercept_scaling', 1.0) self.multi_class = kwargs.get('multi_class', 'ovr') self.random_state = kwargs.get('random_state', None) # The following parameters are changed from default # since we want to induce sparsity in the final # feature set of Bolasso. # liblinear is needed to be working with 'L1' penalty. self.logit = LogisticRegressionCV( Cs=self.Cs, fit_intercept=self.fit_intercept, cv=self.cv, dual=self.dual, penalty='l1', scoring=self.scoring, solver='liblinear', tol=self.tol, max_iter=self.max_iter, class_weight=self.class_weight, n_jobs=self.n_jobs, verbose=self.verbose, refit=self.refit, intercept_scaling=self.intercept_scaling, multi_class=self.multi_class, random_state=self.random_state ) super(Bolasso, self).__init__(bootstrap_fraction, self.logit, random_seed=random_seed, feature_importance_metric=feature_importance_metric, feature_importance_threshold=feature_importance_threshold)
def one_set(A, y, cv, final_model, names, feature_names, results_dir, train_func=None, predict_func=None, baseline=None): log.info("Starting {} analysis.".format(results_dir)) #create storage directory if not os.path.exists(results_dir): os.makedirs(results_dir) fpr_array, tpr_array, thresh_array, oob_estimates = validation.compute_cv(cv, final_model, A, y, train_func, predict_func) log.info("Building storage record.") result = validation.create_record(final_model, y, cv, names, fpr_array, tpr_array, thresh_array, oob_estimates) try: #if logistic regression get feature weights if 'logitreg' in final_model.named_steps: logitreg = final_model.named_steps['logitreg'] logit_out = {} logit_out['lambda'] = (1.0/logitreg.Cs_).tolist(); logit_out['lambda_best'] = (1.0/logitreg.C_).tolist()[0]; #now get the empty valid_idx = final_model.named_steps['empty'].get_important_indicies() ordered = zip(valid_idx, logitreg.coef_.ravel()) ordered = sorted(ordered, key=lambda o: -np.abs(o[1])) out_dict = [] max_value = np.abs(ordered[0][1]) for idx, value in ordered: if max_value*1.e-6>np.abs(value): break; out_dict.append({'name' : feature_names[idx], 'value' : value }) logit_out['type'] = 'LogisticRegressionCV' logit_out['nnz'] = len(out_dict) logit_out['weights'] = out_dict logit_out['offset'] = logitreg.intercept_[0] #store the result result['model'] = logit_out except: tb = traceback.format_exc() log.error(tb) log.info('Created results.') path = validation.store_record(result, results_dir, 'full_time', False) log.info('Stored results to directory %s.' % (str(path))) log.info("Finished!")