我们从Python开源项目中,提取了以下17个代码示例,用于说明如何使用sklearn.linear_model.PassiveAggressiveClassifier()。
def passive_aggressive_train(self): '''Trains passive aggressive classifier ''' self._clf = PassiveAggressiveClassifier(n_iter=50, C=0.2, n_jobs=-1, random_state=0) self._clf.fit(self._term_doc_matrix._X, self._term_doc_matrix._y) y_dist = self._clf.decision_function(self._term_doc_matrix._X) pos_ecdf = ECDF(y_dist[y_dist >= 0]) neg_ecdf = ECDF(y_dist[y_dist <= 0]) def proba_function(distance_from_hyperplane): if distance_from_hyperplane > 0: return pos_ecdf(distance_from_hyperplane) / 2. + 0.5 elif distance_from_hyperplane < 0: return pos_ecdf(distance_from_hyperplane) / 2. return 0.5 self._proba = proba_function return self
def test_main(self): categories, documents = get_docs_categories() clean_function = lambda text: '' if text.startswith('[') else text entity_types = set(['GPE']) term_doc_mat = ( TermDocMatrixFactory( category_text_iter=zip(categories, documents), clean_function=clean_function, nlp=_testing_nlp, feats_from_spacy_doc=FeatsFromSpacyDoc(entity_types_to_censor=entity_types) ).build() ) clf = PassiveAggressiveClassifier(n_iter=5, C=0.5, n_jobs=-1, random_state=0) fdc = FeatsFromDoc(term_doc_mat._term_idx_store, clean_function=clean_function, feats_from_spacy_doc=FeatsFromSpacyDoc( entity_types_to_censor=entity_types)).set_nlp(_testing_nlp) tfidf = TfidfTransformer(norm='l1') X = tfidf.fit_transform(term_doc_mat._X) clf.fit(X, term_doc_mat._y) X_to_predict = fdc.feats_from_doc('Did sometimes march UNKNOWNWORD') pred = clf.predict(tfidf.transform(X_to_predict)) dec = clf.decision_function(X_to_predict)
def test_learning_curve_batch_and_incremental_learning_are_equal(): X, y = make_classification(n_samples=30, n_features=1, n_informative=1, n_redundant=0, n_classes=2, n_clusters_per_class=1, random_state=0) train_sizes = np.linspace(0.2, 1.0, 5) estimator = PassiveAggressiveClassifier(n_iter=1, shuffle=False) train_sizes_inc, train_scores_inc, test_scores_inc = \ learning_curve( estimator, X, y, train_sizes=train_sizes, cv=3, exploit_incremental_learning=True) train_sizes_batch, train_scores_batch, test_scores_batch = \ learning_curve( estimator, X, y, cv=3, train_sizes=train_sizes, exploit_incremental_learning=False) assert_array_equal(train_sizes_inc, train_sizes_batch) assert_array_almost_equal(train_scores_inc.mean(axis=1), train_scores_batch.mean(axis=1)) assert_array_almost_equal(test_scores_inc.mean(axis=1), test_scores_batch.mean(axis=1))
def test_class_weights(): # Test class weights. X2 = np.array([[-1.0, -1.0], [-1.0, 0], [-.8, -1.0], [1.0, 1.0], [1.0, 0.0]]) y2 = [1, 1, 1, -1, -1] clf = PassiveAggressiveClassifier(C=0.1, n_iter=100, class_weight=None, random_state=100) clf.fit(X2, y2) assert_array_equal(clf.predict([[0.2, -1.0]]), np.array([1])) # we give a small weights to class 1 clf = PassiveAggressiveClassifier(C=0.1, n_iter=100, class_weight={1: 0.001}, random_state=100) clf.fit(X2, y2) # now the hyperplane should rotate clock-wise and # the prediction on this point should shift assert_array_equal(clf.predict([[0.2, -1.0]]), np.array([-1]))
def test_equal_class_weight(): X2 = [[1, 0], [1, 0], [0, 1], [0, 1]] y2 = [0, 0, 1, 1] clf = PassiveAggressiveClassifier(C=0.1, n_iter=1000, class_weight=None) clf.fit(X2, y2) # Already balanced, so "balanced" weights should have no effect clf_balanced = PassiveAggressiveClassifier(C=0.1, n_iter=1000, class_weight="balanced") clf_balanced.fit(X2, y2) clf_weighted = PassiveAggressiveClassifier(C=0.1, n_iter=1000, class_weight={0: 0.5, 1: 0.5}) clf_weighted.fit(X2, y2) # should be similar up to some epsilon due to learning rate schedule assert_almost_equal(clf.coef_, clf_weighted.coef_, decimal=2) assert_almost_equal(clf.coef_, clf_balanced.coef_, decimal=2)
def test_basic(self, single_chunk_classification): X, y = single_chunk_classification a = lm.PartialPassiveAggressiveClassifier(classes=[0, 1], random_state=0, max_iter=100, tol=1e-3) b = lm_.PassiveAggressiveClassifier(random_state=0, max_iter=100, tol=1e-3) a.fit(X, y) b.partial_fit(X, y, classes=[0, 1]) assert_estimator_equal(a, b, exclude=['loss_function_'])
def run_cat(filename,modelname,fileout,embeddings,new_run=True,run_parse=True, model_type='logreg',C=10.0, alpha=1.0, cutoff=0.50, n_iter=1): # pull relevant data and run parsing and classification df = pd.read_csv(filename) if (len(df.columns)==2): # make sure columns have the right names df.columns = ['raw','amount'] if new_run: # initialize the model; if model_type=='logreg': model = linear_model.SGDClassifier(loss='log',warm_start=True, n_iter=n_iter,alpha=alpha) elif model_type=='passive-aggressive': model = linear_model.PassiveAggressiveClassifier(C=C,warm_start=True) elif model_type=='naive-bayes': model = naive_bayes.GaussianNB() else: raise NameError('model_type must be logreg, passive-aggressive, or naive-bayes') else: # load a saved, pre-trained model modelFileLoad = open(modelname, 'rb') model = pickle.load(modelFileLoad) fileCities = dirs.data_dir + 'cities_by_state.pickle' us_cities = pd.read_pickle(fileCities) df = cat_df(df,model,us_cities,embeddings,new_run,run_parse,cutoff=cutoff, model_type=model_type) df.to_csv(fileout,index=False) # Saving logistic regression model from training set 1 modelFileSave = open(modelname, 'wb') pickle.dump(model, modelFileSave) modelFileSave.close() # ------ testing functions
def generate_base_classification(): from sklearn.svm import LinearSVC, NuSVC, SVC from sklearn.tree import ExtraTreeClassifier, DecisionTreeClassifier from sklearn.ensemble import RandomForestClassifier from sklearn.gaussian_process import GaussianProcessClassifier from sklearn.linear_model import LogisticRegression, PassiveAggressiveClassifier, RidgeClassifier, SGDClassifier from sklearn.neighbors import KNeighborsClassifier from sklearn.naive_bayes import MultinomialNB, GaussianNB, BernoulliNB models = [ #(LinearSVC, params('C', 'loss')), # (NuSVC, params('nu', 'kernel', 'degree')), #(SVC, params('C', 'kernel')), #(ExtraTreeClassifier, params('criterion', 'min_samples_split', 'min_samples_leaf')), (DecisionTreeClassifier, params('criterion', 'min_samples_split', 'min_samples_leaf')), (RandomForestClassifier, params('criterion', 'min_samples_split', 'min_samples_leaf', 'n_estimators')), #(GaussianProcessClassifier, None), (LogisticRegression, params('C', 'penalty')), #(PassiveAggressiveClassifier, params('C', 'loss')), #(RidgeClassifier, params('alpha')), # we do in-place modification of what the method params return in order to add # more loss functions that weren't defined in the method #(SGDClassifier, params('loss', 'penalty', 'alpha')['loss'].extend(['log', 'modified_huber'])), (KNeighborsClassifier, params('n_neighbors', 'leaf_size', 'p').update({ 'algorithm': ['auto', 'brute', 'kd_tree', 'ball_tree'] })), (MultinomialNB, params('alpha')), #(GaussianNB, None), #(BernoulliNB, params('alpha')) ] return models
def test_classifier_accuracy(): for data in (X, X_csr): for fit_intercept in (True, False): clf = PassiveAggressiveClassifier(C=1.0, n_iter=30, fit_intercept=fit_intercept, random_state=0) clf.fit(data, y) score = clf.score(data, y) assert_greater(score, 0.79)
def test_classifier_partial_fit(): classes = np.unique(y) for data in (X, X_csr): clf = PassiveAggressiveClassifier(C=1.0, fit_intercept=True, random_state=0) for t in range(30): clf.partial_fit(data, y, classes) score = clf.score(data, y) assert_greater(score, 0.79)
def test_classifier_refit(): # Classifier can be retrained on different labels and features. clf = PassiveAggressiveClassifier().fit(X, y) assert_array_equal(clf.classes_, np.unique(y)) clf.fit(X[:, :-1], iris.target_names[y]) assert_array_equal(clf.classes_, iris.target_names)
def test_classifier_undefined_methods(): clf = PassiveAggressiveClassifier() for meth in ("predict_proba", "predict_log_proba", "transform"): assert_raises(AttributeError, lambda x: getattr(clf, x), meth)
def test_partial_fit_weight_class_balanced(): # partial_fit with class_weight='balanced' not supported clf = PassiveAggressiveClassifier(class_weight="balanced") assert_raises(ValueError, clf.partial_fit, X, y, classes=np.unique(y))
def test_wrong_class_weight_label(): # ValueError due to wrong class_weight label. X2 = np.array([[-1.0, -1.0], [-1.0, 0], [-.8, -1.0], [1.0, 1.0], [1.0, 0.0]]) y2 = [1, 1, 1, -1, -1] clf = PassiveAggressiveClassifier(class_weight={0: 0.5}) assert_raises(ValueError, clf.fit, X2, y2)
def test_partial_fit(): est = PassiveAggressiveClassifier(random_state=0, shuffle=False) transformer = SelectFromModel(estimator=est) transformer.partial_fit(data, y, classes=np.unique(y)) old_model = transformer.estimator_ transformer.partial_fit(data, y, classes=np.unique(y)) new_model = transformer.estimator_ assert_true(old_model is new_model) X_transform = transformer.transform(data) transformer.fit(np.vstack((data, data)), np.concatenate((y, y))) assert_array_equal(X_transform, transformer.transform(data))
def test_warm_start(): est = PassiveAggressiveClassifier(warm_start=True, random_state=0) transformer = SelectFromModel(estimator=est) transformer.fit(data, y) old_model = transformer.estimator_ transformer.fit(data, y) new_model = transformer.estimator_ assert_true(old_model is new_model)