我们从Python开源项目中,提取了以下50个代码示例,用于说明如何使用sklearn.svm.LinearSVC()。
def __init__(self, filename, target_map, classifier='svm'): self.seed_ = 0 self.filename_ = filename self.target_map_ = target_map self.target_ids_ = (np.unique(target_map.keys())).astype(np.int32) self.epoch_no_ = 0 self.st_time_ = time.time() # Setup classifier print('-------------------------------') print('====> Building Classifier, setting class weights') if classifier == 'svm': self.clf_hyparams_ = {'C':[0.01, 0.1, 1.0, 10.0, 100.0], 'class_weight': ['balanced']} self.clf_base_ = LinearSVC(random_state=self.seed_) elif classifier == 'sgd': self.clf_hyparams_ = {'alpha':[0.0001, 0.001, 0.01, 0.1, 1.0, 10.0], 'class_weight':['auto']} # 'loss':['hinge'], self.clf_ = SGDClassifier(loss='log', penalty='l2', shuffle=False, random_state=self.seed_, warm_start=True, n_jobs=-1, n_iter=1, verbose=4) else: raise Exception('Unknown classifier type %s. Choose from [sgd, svm, gradient-boosting, extra-trees]' % classifier)
def __init__(self, cls, dim, feature_scale=1.0, C=0.001, B=10.0, pos_weight=2.0): self.pos = np.zeros((0, dim), dtype=np.float32) self.neg = np.zeros((0, dim), dtype=np.float32) self.B = B self.C = C self.cls = cls self.pos_weight = pos_weight self.dim = dim self.feature_scale = feature_scale self.svm = svm.LinearSVC(C=C, class_weight={1: 2, -1: 1}, intercept_scaling=B, verbose=1, penalty='l2', loss='l1', random_state=cfg.RNG_SEED, dual=True) self.pos_cur = 0 self.num_neg_added = 0 self.retrain_limit = 2000 self.evict_thresh = -1.1 self.loss_history = []
def make_classification_example(axis, random_state): X, y = make_blobs(n_samples=100, n_features=2, centers=2, cluster_std=2.7, random_state=random_state) axis.scatter(X[y == 0, 0], X[y == 0, 1], color="red", s=10, label="Disease") axis.scatter(X[y == 1, 0], X[y == 1, 1], color="blue", s=10, label="Healthy") clf = LinearSVC().fit(X, y) # get the separating hyperplane w = clf.coef_[0] a = -w[0] / w[1] xx = np.linspace(-5, 7) yy = a * xx - (clf.intercept_[0]) / w[1] # plot the line, the points, and the nearest vectors to the plane axis.plot(xx, yy, 'k-', color="black", label="Model") ax1.tick_params(labelbottom='off', labelleft='off') ax1.set_xlabel("Gene 1") ax1.set_ylabel("Gene 2") ax1.legend()
def define_model(self, model, parameters, n_cores = 0): clfs = {'RandomForestClassifier': RandomForestClassifier(n_estimators=50, n_jobs=7), 'ExtraTreesClassifier': ExtraTreesClassifier(n_estimators=10, n_jobs=7, criterion='entropy'), 'AdaBoostClassifier': AdaBoostClassifier(DecisionTreeClassifier(max_depth=1), algorithm="SAMME", n_estimators=200), 'LogisticRegression': LogisticRegression(penalty='l1', C=1e5), 'svm.SVC': svm.SVC(kernel='linear', probability=True, random_state=0), 'GradientBoostingClassifier': GradientBoostingClassifier(learning_rate=0.05, subsample=0.5, max_depth=6, n_estimators=10), 'GaussianNB': GaussianNB(), 'DecisionTreeClassifier': DecisionTreeClassifier(), 'SGDClassifier': SGDClassifier(loss="hinge", penalty="l2", n_jobs=7), 'KNeighborsClassifier': KNeighborsClassifier(n_neighbors=3), 'linear.SVC': svm.LinearSVC() } if model not in clfs: raise ConfigError("Unsupported model {}".format(model)) clf = clfs[model] clf.set_params(**parameters) return clf
def do_ml(ticker): X, y, df = extract_featuresets(ticker) X_train, X_test, y_train, y_test = cross_validation.train_test_split(X, y, test_size=0.25) #clf = neighbors.KNeighborsClassifier() clf = VotingClassifier([('lsvc',svm.LinearSVC()), ('knn',neighbors.KNeighborsClassifier()), ('rfor',RandomForestClassifier())]) clf.fit(X_train, y_train) confidence = clf.score(X_test, y_test) print('accuracy:',confidence) predictions = clf.predict(X_test) print('predicted class counts:',Counter(predictions)) print() print() return confidence # examples of running:
def convert(model, feature_names, target): """Convert a LinearSVC model to the protobuf spec. Parameters ---------- model: LinearSVC A trained LinearSVC model. feature_names: [str] Name of the input columns. target: str Name of the output column. Returns ------- model_spec: An object of type Model_pb. Protobuf representation of the model """ if not(_HAS_SKLEARN): raise RuntimeError('scikit-learn not found. scikit-learn conversion API is disabled.') _sklearn_util.check_expected_type(model, _LinearSVC) _sklearn_util.check_fitted(model, lambda m: hasattr(m, 'coef_')) return _MLModel(_logistic_regression._convert(model, feature_names, target))
def __init__(self, a_clf=None, a_grid_search=False): """Class constructor. Initialize classifier. Args: a_clf (classifier or None): classifier to use or None for default a_grid_search (bool): use grid search for estimating hyper-parameters """ classifier = a_clf or LinearSVC(C=DFLT_C, **DFLT_PARAMS) self._gs = a_grid_search self._model = Pipeline([("vect", DictVectorizer()), ("clf", classifier)])
def __init__(self, isTrain, isOutlierRemoval=0): """ The linear models ``LinearSVC()`` and ``SVC(kernel='linear')`` yield slightly different decision boundaries. This can be a consequence of the following differences: - ``LinearSVC`` minimizes the squared hinge loss while ``SVC`` minimizes the regular hinge loss. - ``LinearSVC`` uses the One-vs-All (also known as One-vs-Rest) multiclass reduction while ``SVC`` uses the One-vs-One multiclass reduction. :return: """ super(ClassificationSVM, self).__init__(isTrain, isOutlierRemoval) # data preprocessing self.dataPreprocessing() self.clf = svm.SVC() # define the SVM classifier C = 1.0 # SVM regularization parameter self.svc = svm.SVC(kernel='linear', C=C, max_iter=100000) self.rbf_svc = svm.SVC(kernel='rbf', gamma=0.7, C=C) self.poly_svc = svm.SVC(kernel='poly', coef0=1, degree=3, C=C) self.lin_svc = svm.LinearSVC(C=C)
def get_classifier(self): algo=self.algo if algo=="GBT": return GradientBoostingClassifier() elif algo=="RF": return RandomForestClassifier() elif algo=="ADB": return AdaBoostClassifier() elif algo =="DT": return DecisionTreeClassifier() elif algo=="NB": return BernoulliNB() elif algo=="SGD": return SGDClassifier() elif algo=="SVC": return LinearSVC() elif algo=="MLPC": return MLPClassifier(activation='logistic', batch_size='auto', early_stopping=True, hidden_layer_sizes=(100,), learning_rate='adaptive', learning_rate_init=0.1, max_iter=5000, random_state=1, solver='lbfgs', tol=0.0001, validation_fraction=0.1, verbose=False, warm_start=False) return 0
def test_classes__property(): # Test that classes_ property matches best_estimator_.classes_ X = np.arange(100).reshape(10, 10) y = np.array([0] * 5 + [1] * 5) Cs = [.1, 1, 10] grid_search = dcv.GridSearchCV(LinearSVC(random_state=0), {'C': Cs}) grid_search.fit(X, y) assert_array_equal(grid_search.best_estimator_.classes_, grid_search.classes_) # Test that regressors do not have a classes_ attribute grid_search = dcv.GridSearchCV(Ridge(), {'alpha': [1.0, 2.0]}) grid_search.fit(X, y) assert not hasattr(grid_search, 'classes_') # Test that the grid searcher has no classes_ attribute before it's fit grid_search = dcv.GridSearchCV(LinearSVC(random_state=0), {'C': Cs}) assert not hasattr(grid_search, 'classes_') # Test that the grid searcher has no classes_ attribute without a refit grid_search = dcv.GridSearchCV(LinearSVC(random_state=0), {'C': Cs}, refit=False) grid_search.fit(X, y) assert not hasattr(grid_search, 'classes_')
def test_grid_search_sparse(): # Test that grid search works with both dense and sparse matrices X_, y_ = make_classification(n_samples=200, n_features=100, random_state=0) clf = LinearSVC() cv = dcv.GridSearchCV(clf, {'C': [0.1, 1.0]}) cv.fit(X_[:180], y_[:180]) y_pred = cv.predict(X_[180:]) C = cv.best_estimator_.C X_ = sp.csr_matrix(X_) clf = LinearSVC() cv = dcv.GridSearchCV(clf, {'C': [0.1, 1.0]}) cv.fit(X_[:180].tocoo(), y_[:180]) y_pred2 = cv.predict(X_[180:]) C2 = cv.best_estimator_.C assert np.mean(y_pred == y_pred2) >= .9 assert C == C2
def train_pumil_clf(bags, pidx, uidx, w, NL, learning_phase = False): # top-{NL} reliable negative bags relnidx = reliable_negative_bag_idx(bags, uidx, w, NL) Bn = [bags[j] for j in relnidx] # estimated p(X|Y=-1) via WKDE Dn = weighted_kde(Bn, w[relnidx]) # form Positive Margin Pool (PMP) pmp_x, pmp_y, pmp_conf = form_pmp(bags, w, pidx, relnidx, Dn) # train SVM by using PMP instances pmp_weighted_x = np.multiply(pmp_x.T, pmp_conf).T clf = svm.LinearSVC(loss = 'hinge') clf.fit(pmp_weighted_x, pmp_y) clf_ = pumil_clf_wrapper(lambda x: float(clf.decision_function(x)), Dn, learning_phase) if learning_phase: return clf_, relnidx else: return clf_
def _conversion_and_evaluation_helper_for_linear_svc(self, class_labels): ARGS = [ {}, {'C' : .75, 'loss': 'hinge'}, {'penalty': 'l1', 'dual': False}, {'tol': 0.001, 'fit_intercept': False}, {'intercept_scaling': 1.5} ] x, y = GlmCassifierTest._generate_random_data(class_labels) column_names = ['x1', 'x2'] df = pd.DataFrame(x, columns=column_names) for cur_args in ARGS: print(class_labels, cur_args) cur_model = LinearSVC(**cur_args) cur_model.fit(x, y) spec = convert(cur_model, input_features=column_names, output_feature_names='target') df['prediction'] = cur_model.predict(x) cur_eval_metics = evaluate_classifier(spec, df, verbose=False) self.assertEquals(cur_eval_metics['num_errors'], 0)
def svm_experiment(scope_name, X, y): for lp in lp_cand: results = [] for r in range(50): with open('data/local/split/' + scope_name + '/lb' + str(lp).zfill(3) + '_' + str(r).zfill( 3) + '_train') as f: trainLabel = pk.load(f) with open('data/local/split/' + scope_name + '/lb' + str(lp).zfill(3) + '_' + str(r).zfill( 3) + '_test') as f: testLabel = pk.load(f) XTrain = X[trainLabel.keys()] XTest = X[testLabel.keys()] yTrain = y[trainLabel.keys()] yTest = y[testLabel.keys()] # train clf = LinearSVC(C=0.01) clf.fit(XTrain, yTrain) # test pred = clf.predict(XTest) results.append(sum(pred == yTest) / float(yTest.shape[0])) return np.mean(results)
def fit(self, x, y): # Convert non-binary features to binary bin_x = tfidf_to_counts(x) # Calculating the log-count ratio X_pos = bin_x[np.where(y == 1)] X_neg = bin_x[np.where(y == 0)] self.r = log_count_ratio(X_pos, X_neg) X = np.multiply(self.r, bin_x) # Training linear SVM with NB features but no interpolation svm = LinearSVC(C=self.C) svm.fit(X, y) self.coef_ = svm.coef_ self.int_coef_ = interpolate(self.coef_, self.beta) self.bias = svm.intercept_ # Scores the interpolated model
def init_model(): # “????”?? f_trunk = QuestionTrunkVectorizer(tokenizer=tokenize) # Word2Vec ???? f_word2vec = Question2VecVectorizer(tokenizer=tokenize) # ???? (400 ?) union_features = FeatureUnion([ ('f_trunk_lsa', Pipeline([ ('trunk', f_trunk), # ??_????: ?????? (LSA) ('lsa', TruncatedSVD(n_components=200, n_iter=10)) ])), ('f_word2vec', f_word2vec), ]) model = Pipeline([('union', union_features), ('clf', LinearSVC(C=0.02))]) return model
def grid_retrain_in_f(self, n_dim=500): rbf_map = RBFSampler(n_dim, random_state=1) fourier_approx_svm = pipeline.Pipeline([("mapper", rbf_map), ("svm", LinearSVC())]) # C_range = np.logspace(-5, 15, 21, base=2) # gamma_range = np.logspace(-15, 3, 19, base=2) # param_grid = dict(mapper__gamma=gamma_range, svm__C=C_range) # cv = StratifiedShuffleSplit(Y, n_iter=5, test_size=0.2, random_state=42) # grid = GridSearchCV(fourier_approx_svm, param_grid=param_grid, cv=cv) # grid.fit(X, Y) # # rbf_svc2 = grid.best_estimator_ rbf_svc2 = fourier_approx_svm rbf_svc2.fit(self.X_ex, self.y_ex) self.set_clf2(rbf_svc2) return self.benchmark()
def train(labeled_featuresets, C=1e5): """ :param labeled_featuresets: A list of classified featuresets, i.e., a list of tuples ``(featureset, label)``. """ feat = [featureset for featureset, label in labeled_featuresets] feature_vectorizer = MVectorizer.DictsVectorizer() X = feature_vectorizer.fit_transform(feat) X = Normalizer().fit_transform(X) label_set = set( [label for featureset, label in labeled_featuresets] ) label_vectorizer = dict( [(label,num) for num,label in enumerate(label_set)] ) y = numpy.array([label_vectorizer[label] for featureset, label in labeled_featuresets]) # print "Training on %d examples with %d features..."%(X.shape[0],X.shape[1]), classifier = OneVsRestClassifier(LinearSVC(loss='squared_hinge', penalty='l2', dual=True, tol=1e-5, C=C)) classifier.fit(X,y) # print "done" return scikit_classifier(feature_vectorizer,label_vectorizer,classifier)
def train_svms(): if not os.path.isfile('models/fine_tune.model.index'): print('models/fine_tune.model doesn\'t exist.') return net = create_alexnet() model = tflearn.DNN(net) model.load('models/fine_tune.model') train_file_dir = 'svm_train/' flist = os.listdir(train_file_dir) svms = [] for train_file in flist: if "pkl" in train_file: continue X, Y = generate_single_svm_train_data(train_file_dir + train_file) train_features = [] for i in X: feats = model.predict([i]) train_features.append(feats[0]) print("feature dimension of fitting: {}".format(np.shape(train_features))) clf = svm.LinearSVC() clf.fit(train_features, Y) svms.append(clf) joblib.dump(svms, 'models/train_svm.model')
def article_trainers(articles: ArticleDB): """ Run repeated models against article db to predict validity score for articles. """ models = [(DecisionTreeClassifier, {}), (RandomForestClassifier, {}), (LogisticRegression, {'C': [0.01, 0.1, 1, 10, 100]}), (MultinomialNB, {'alpha': [0.1, 1.0, 10.0, 100.0]}), (LinearSVC, {'C': [0.01, 0.1, 1, 10, 100]})] trained_models = [] for classifier, param_grid in models: res = train_model(articles, classifier, param_grid, probabilities=True) trained_models.append((str(res), res)) ensemble_learner = VotingClassifier(estimators=trained_models[:4], voting='soft') train_model(articles, ensemble_learner, {})
def test_decision_function_rocauc(self): """ Test ROCAUC with classifiers that have a decision function """ # Load the model and assert there is no predict_proba method. model = LinearSVC() with self.assertRaises(AttributeError): model.predict_proba # Fit model and visualizer visualizer = ROCAUC(model) visualizer.fit(X, yb) expected = np.asarray([ 0.204348, 0.228593, 0.219908, -0.211756, -0.26155 , -0.221405 ]) # Get the predict_proba scores and evaluate y_scores = visualizer._get_y_scores(X) npt.assert_array_almost_equal(y_scores, expected, decimal=1)
def tune_para(dataframe, i): # To apply an classifier on this data, we need to flatten the image, to # turn the data in a (samples, feature) matrix: columns = ['SMA_10','Momentum','stoch_K','WMA_10','MACD','A/D','Volume'] X = dataframe[columns].as_matrix() y = dataframe['Adj Close'].as_matrix() X_train = X[i-200:i] y_train = y[i-200:i] X_test = X[i:i+1] y_test = y[i:i+1] ### Train four kinds of SVM model C = 1 # SVM regularization parameter svc = svm.SVC(cache_size = 1000, kernel='linear', C=C).fit(X_train, y_train) rbf_svc = svm.SVC(cache_size = 1000, kernel='rbf', gamma=0.7, C=C).fit(X_train, y_train) poly_svc = svm.SVC(cache_size = 1000, kernel='poly', degree=3, C=C).fit(X_train, y_train) lin_svc = svm.LinearSVC(loss='squared_hinge', penalty='l1', dual=False, C=C).fit(X_train, y_train) Y_result = y_test ### Make the prediction for i, clf in enumerate((svc, lin_svc, rbf_svc, poly_svc)): pred = clf.predict(X_test) Y_result = np.vstack((Y_result, np.array(pred))) # append prediction on Y_result return Y_result.T
def SVMbanchmark(X_train, y_train, X_test, y_test): # optimial c is 10.0, f1 = 0.52 print("Training LinearSVC with l1-based feature selection") X_valid, y_valid = X_test[:10000], y_test[:10000] score_list = [] CList = [0.1, 0.5, 1, 10, 50, 100] for c in CList: clf = OneVsRestClassifier(LinearSVC(C=c, penalty='l1', dual=False)) clf.fit(X_train, y_train) pred = clf.predict(X_valid) score = metrics.f1_score(y_valid, pred, average="macro") score_list.append(score) print("f1-score: {:f}, c is {:f}".format(score, c)) clf = OneVsRestClassifier(LinearSVC(penality="l1", dual=False, C=CList[np.argmax(score_list)])) clf.fit(X_train, y_train) pred = clf.predict(X_test) score = metrics.f1_score(y_test, pred, average="micro") print("f1-score for test set: {:f}".format(score))
def SVMbanchmark(X_train, X_test, y_train, y_test): # optimial c is 10.0, f1 = 0.52 print("Training LinearSVC with l1-based feature selection") import pdb pdb.set_trace() X_valid, y_valid = X_test[:10000], y_test[:10000] score_list = [] CList = [0.1, 0.5, 1, 10, 50, 100] for c in CList: clf = LinearSVC(C=c, penalty='l1', dual=False) clf.fit(X_train, y_train) pred = clf.predict(X_valid) score = metrics.accuracy_score(y_valid, pred) score_list.append(score) print("f1-score: {:f}, c is {:f}".format(score, c)) clf = LinearSVC(penality="l1", dual=False, C=CList[np.argmax(score_list)]) clf.fit(X_train, y_train) pred = clf.predict(X_test) print("f1-score for test set: {:f}".format(score))
def compareWithSvm(self,datasetTrain,datasetTest): C=[0.000001,0.00001,0.0001,0.001,0.01,0.1,1,10,100,1000] print '\n' print 'dataset shape is ',datasetTrain.shape self.y_train=self.y_train.reshape(-1,) for c in C: self.Svm=svm.LinearSVC(C=c) self.Svm.fit(datasetTrain,self.y_train) labels=self.Svm.predict(datasetTest) print 'accuracy with c=',c,' is ',self.checkAccuracy(labels,self.y_test),'% ','\n' #for graph based reasoning , replace every 0 with -1
def trainClassifier(foldername,classifierName): model = cv2.ml.KNearest_create() features = [] labels = [] os.chdir(foldername) for filename in glob.iglob('*.png'): features.append(cv2.imread((filename),-1)) labels.append(filename[0]) list_hog_fd = [] for feature in features: fd = hog(feature.reshape((27, 35)), orientations=9, pixels_per_cell=(9, 7), cells_per_block=(1, 1), visualise=False) list_hog_fd.append(fd) hog_features = np.array(list_hog_fd, 'float64') os.chdir("..") clf = LinearSVC() clf.fit(hog_features, labels) joblib.dump(clf,classifierName, compress=3) os.chdir("..")
def test_RFECV(): ''' test the method of RFECV :return: None ''' iris=load_iris() X=iris.data y=iris.target estimator=LinearSVC() selector=RFECV(estimator=estimator,cv=3) selector.fit(X,y) print("N_features %s"%selector.n_features_) print("Support is %s"%selector.support_) print("Ranking %s"%selector.ranking_) print("Grid Scores %s"%selector.grid_scores_)
def compute_cross_fold(data): data_table = pd.read_csv("total_set.csv",index_col=0) #data_norm = (data - data.mean()) / (data.sum()) scaler = preprocessing.StandardScaler().fit(data) data_scaled = scaler.transform(data) #print data_scaled profitability_target = data_table['Profitable'] #print profitability_target #gross_target = data_table['Domestic Gross'] #tomato = data_table['Rotten'] #normalized_target_gross = (gross_target - gross_target.mean()) / (gross_target.max() - gross_target.min()) #tomato = (tomato - tomato.mean()) / (tomato.max() - tomato.min()) #clf_profit = svm.SVC(kernel='rbf',C=0.8, gamma=5,verbose=True) clf_profit = svm.LinearSVC(C=0.001,verbose=True,tol=.1) clf_profit.fit(data_scaled,profitability_target) scores = cross_val_score(clf_profit, data_scaled, profitability_target, cv=10) #print("Accuracy: %0.2f (+/- %0.2f)" % (scores.mean(), scores.std() * 2)) return (scores.mean(), scores.std() * 2)
def _train(self, X_matrix, y, **kwargs): """???? Parameters: X_matrix (numpy.array): - ???????????? y (numpy.array): - ??????????? Returns: sklearn.model: - sklearn??? """ from sklearn.svm import LinearSVC model = LinearSVC(**kwargs) model.fit(X_matrix, y) return model
def test_random_hasher(): # test random forest hashing on circles dataset # make sure that it is linearly separable. # even after projected to two SVD dimensions # Note: Not all random_states produce perfect results. hasher = RandomTreesEmbedding(n_estimators=30, random_state=1) X, y = datasets.make_circles(factor=0.5) X_transformed = hasher.fit_transform(X) # test fit and transform: hasher = RandomTreesEmbedding(n_estimators=30, random_state=1) assert_array_equal(hasher.fit(X).transform(X).toarray(), X_transformed.toarray()) # one leaf active per data point per forest assert_equal(X_transformed.shape[0], X.shape[0]) assert_array_equal(X_transformed.sum(axis=1), hasher.n_estimators) svd = TruncatedSVD(n_components=2) X_reduced = svd.fit_transform(X_transformed) linear_clf = LinearSVC() linear_clf.fit(X_reduced, y) assert_equal(linear_clf.score(X_reduced, y), 1.)