Python sklearn.svm 模块,LinearSVC() 实例源码

我们从Python开源项目中,提取了以下50个代码示例,用于说明如何使用sklearn.svm.LinearSVC()

项目:pybot    作者:spillai    | 项目源码 | 文件源码
def __init__(self, filename, target_map, classifier='svm'): 

        self.seed_ = 0
        self.filename_ = filename
        self.target_map_ = target_map
        self.target_ids_ = (np.unique(target_map.keys())).astype(np.int32)
        self.epoch_no_ = 0
        self.st_time_ = time.time()

        # Setup classifier
        print('-------------------------------')        
        print('====> Building Classifier, setting class weights') 
        if classifier == 'svm': 
            self.clf_hyparams_ = {'C':[0.01, 0.1, 1.0, 10.0, 100.0], 'class_weight': ['balanced']}
            self.clf_base_ = LinearSVC(random_state=self.seed_)
        elif classifier == 'sgd': 
            self.clf_hyparams_ = {'alpha':[0.0001, 0.001, 0.01, 0.1, 1.0, 10.0], 'class_weight':['auto']} # 'loss':['hinge'], 
            self.clf_ = SGDClassifier(loss='log', penalty='l2', shuffle=False, random_state=self.seed_, 
                                      warm_start=True, n_jobs=-1, n_iter=1, verbose=4)
        else: 
            raise Exception('Unknown classifier type %s. Choose from [sgd, svm, gradient-boosting, extra-trees]' 
                            % classifier)
项目:adversarial-frcnn    作者:xiaolonw    | 项目源码 | 文件源码
def __init__(self, cls, dim, feature_scale=1.0,
                 C=0.001, B=10.0, pos_weight=2.0):
        self.pos = np.zeros((0, dim), dtype=np.float32)
        self.neg = np.zeros((0, dim), dtype=np.float32)
        self.B = B
        self.C = C
        self.cls = cls
        self.pos_weight = pos_weight
        self.dim = dim
        self.feature_scale = feature_scale
        self.svm = svm.LinearSVC(C=C, class_weight={1: 2, -1: 1},
                                 intercept_scaling=B, verbose=1,
                                 penalty='l2', loss='l1',
                                 random_state=cfg.RNG_SEED, dual=True)
        self.pos_cur = 0
        self.num_neg_added = 0
        self.retrain_limit = 2000
        self.evict_thresh = -1.1
        self.loss_history = []
项目:microbiome-summer-school-2017    作者:aldro61    | 项目源码 | 文件源码
def make_classification_example(axis, random_state):
    X, y = make_blobs(n_samples=100, n_features=2, centers=2, cluster_std=2.7, random_state=random_state)

    axis.scatter(X[y == 0, 0], X[y == 0, 1], color="red", s=10, label="Disease")
    axis.scatter(X[y == 1, 0], X[y == 1, 1], color="blue", s=10, label="Healthy")

    clf = LinearSVC().fit(X, y)

    # get the separating hyperplane
    w = clf.coef_[0]
    a = -w[0] / w[1]
    xx = np.linspace(-5, 7)
    yy = a * xx - (clf.intercept_[0]) / w[1]

    # plot the line, the points, and the nearest vectors to the plane
    axis.plot(xx, yy, 'k-', color="black", label="Model")

    ax1.tick_params(labelbottom='off', labelleft='off')
    ax1.set_xlabel("Gene 1")
    ax1.set_ylabel("Gene 2")
    ax1.legend()
项目:johnson-county-ddj-public    作者:dssg    | 项目源码 | 文件源码
def define_model(self, model, parameters, n_cores = 0):
        clfs = {'RandomForestClassifier': RandomForestClassifier(n_estimators=50, n_jobs=7),
                'ExtraTreesClassifier': ExtraTreesClassifier(n_estimators=10, n_jobs=7, criterion='entropy'),
                'AdaBoostClassifier': AdaBoostClassifier(DecisionTreeClassifier(max_depth=1), algorithm="SAMME", n_estimators=200),
                'LogisticRegression': LogisticRegression(penalty='l1', C=1e5),
                'svm.SVC': svm.SVC(kernel='linear', probability=True, random_state=0),
                'GradientBoostingClassifier': GradientBoostingClassifier(learning_rate=0.05, subsample=0.5, max_depth=6, n_estimators=10),
                'GaussianNB': GaussianNB(),
                'DecisionTreeClassifier': DecisionTreeClassifier(),
                'SGDClassifier': SGDClassifier(loss="hinge", penalty="l2", n_jobs=7),
                'KNeighborsClassifier': KNeighborsClassifier(n_neighbors=3), 
                'linear.SVC': svm.LinearSVC() }

        if model not in clfs:
            raise ConfigError("Unsupported model {}".format(model))

        clf = clfs[model]
        clf.set_params(**parameters)
        return clf
项目:Stock-Market-Prediction    作者:Diptiranjan1    | 项目源码 | 文件源码
def do_ml(ticker):
    X, y, df = extract_featuresets(ticker)

    X_train, X_test, y_train, y_test = cross_validation.train_test_split(X,
                                                        y,
                                                        test_size=0.25)

    #clf = neighbors.KNeighborsClassifier()

    clf = VotingClassifier([('lsvc',svm.LinearSVC()),
                            ('knn',neighbors.KNeighborsClassifier()),
                            ('rfor',RandomForestClassifier())])


    clf.fit(X_train, y_train)
    confidence = clf.score(X_test, y_test)
    print('accuracy:',confidence)
    predictions = clf.predict(X_test)
    print('predicted class counts:',Counter(predictions))
    print()
    print()
    return confidence

# examples of running:
项目:fast-rcnn-distillation    作者:xiaolonw    | 项目源码 | 文件源码
def __init__(self, cls, dim, feature_scale=1.0,
                 C=0.001, B=10.0, pos_weight=2.0):
        self.pos = np.zeros((0, dim), dtype=np.float32)
        self.neg = np.zeros((0, dim), dtype=np.float32)
        self.B = B
        self.C = C
        self.cls = cls
        self.pos_weight = pos_weight
        self.dim = dim
        self.feature_scale = feature_scale
        self.svm = svm.LinearSVC(C=C, class_weight={1: 2, -1: 1},
                                 intercept_scaling=B, verbose=1,
                                 penalty='l2', loss='l1',
                                 random_state=cfg.RNG_SEED, dual=True)
        self.pos_cur = 0
        self.num_neg_added = 0
        self.retrain_limit = 2000
        self.evict_thresh = -1.1
        self.loss_history = []
项目:faster-rcnn-resnet    作者:Eniac-Xie    | 项目源码 | 文件源码
def __init__(self, cls, dim, feature_scale=1.0,
                 C=0.001, B=10.0, pos_weight=2.0):
        self.pos = np.zeros((0, dim), dtype=np.float32)
        self.neg = np.zeros((0, dim), dtype=np.float32)
        self.B = B
        self.C = C
        self.cls = cls
        self.pos_weight = pos_weight
        self.dim = dim
        self.feature_scale = feature_scale
        self.svm = svm.LinearSVC(C=C, class_weight={1: 2, -1: 1},
                                 intercept_scaling=B, verbose=1,
                                 penalty='l2', loss='l1',
                                 random_state=cfg.RNG_SEED, dual=True)
        self.pos_cur = 0
        self.num_neg_added = 0
        self.retrain_limit = 2000
        self.evict_thresh = -1.1
        self.loss_history = []
项目:py-faster-rcnn-tk1    作者:joeking11829    | 项目源码 | 文件源码
def __init__(self, cls, dim, feature_scale=1.0,
                 C=0.001, B=10.0, pos_weight=2.0):
        self.pos = np.zeros((0, dim), dtype=np.float32)
        self.neg = np.zeros((0, dim), dtype=np.float32)
        self.B = B
        self.C = C
        self.cls = cls
        self.pos_weight = pos_weight
        self.dim = dim
        self.feature_scale = feature_scale
        self.svm = svm.LinearSVC(C=C, class_weight={1: 2, -1: 1},
                                 intercept_scaling=B, verbose=1,
                                 penalty='l2', loss='l1',
                                 random_state=cfg.RNG_SEED, dual=True)
        self.pos_cur = 0
        self.num_neg_added = 0
        self.retrain_limit = 2000
        self.evict_thresh = -1.1
        self.loss_history = []
项目:coremltools    作者:gsabran    | 项目源码 | 文件源码
def convert(model, feature_names, target):
    """Convert a LinearSVC model to the protobuf spec.
    Parameters
    ----------
    model: LinearSVC
        A trained LinearSVC model.

    feature_names: [str]
        Name of the input columns.

    target: str
        Name of the output column.

    Returns
    -------
    model_spec: An object of type Model_pb.
        Protobuf representation of the model
    """
    if not(_HAS_SKLEARN):
        raise RuntimeError('scikit-learn not found. scikit-learn conversion API is disabled.')

    _sklearn_util.check_expected_type(model, _LinearSVC)
    _sklearn_util.check_fitted(model, lambda m: hasattr(m, 'coef_'))

    return _MLModel(_logistic_regression._convert(model, feature_names, target))
项目:py-faster-rcnn-resnet-imagenet    作者:tianzhi0549    | 项目源码 | 文件源码
def __init__(self, cls, dim, feature_scale=1.0,
                 C=0.001, B=10.0, pos_weight=2.0):
        self.pos = np.zeros((0, dim), dtype=np.float32)
        self.neg = np.zeros((0, dim), dtype=np.float32)
        self.B = B
        self.C = C
        self.cls = cls
        self.pos_weight = pos_weight
        self.dim = dim
        self.feature_scale = feature_scale
        self.svm = svm.LinearSVC(C=C, class_weight={1: 2, -1: 1},
                                 intercept_scaling=B, verbose=1,
                                 penalty='l2', loss='l1',
                                 random_state=cfg.RNG_SEED, dual=True)
        self.pos_cur = 0
        self.num_neg_added = 0
        self.retrain_limit = 2000
        self.evict_thresh = -1.1
        self.loss_history = []
项目:face-py-faster-rcnn    作者:playerkk    | 项目源码 | 文件源码
def __init__(self, cls, dim, feature_scale=1.0,
                 C=0.001, B=10.0, pos_weight=2.0):
        self.pos = np.zeros((0, dim), dtype=np.float32)
        self.neg = np.zeros((0, dim), dtype=np.float32)
        self.B = B
        self.C = C
        self.cls = cls
        self.pos_weight = pos_weight
        self.dim = dim
        self.feature_scale = feature_scale
        self.svm = svm.LinearSVC(C=C, class_weight={1: 2, -1: 1},
                                 intercept_scaling=B, verbose=1,
                                 penalty='l2', loss='l1',
                                 random_state=cfg.RNG_SEED, dual=True)
        self.pos_cur = 0
        self.num_neg_added = 0
        self.retrain_limit = 2000
        self.evict_thresh = -1.1
        self.loss_history = []
项目:deep-fashion    作者:zuowang    | 项目源码 | 文件源码
def __init__(self, cls, dim, feature_scale=1.0,
                 C=0.001, B=10.0, pos_weight=2.0):
        self.pos = np.zeros((0, dim), dtype=np.float32)
        self.neg = np.zeros((0, dim), dtype=np.float32)
        self.B = B
        self.C = C
        self.cls = cls
        self.pos_weight = pos_weight
        self.dim = dim
        self.feature_scale = feature_scale
        self.svm = svm.LinearSVC(C=C, class_weight={1: 2, -1: 1},
                                 intercept_scaling=B, verbose=1,
                                 penalty='l2', loss='l1',
                                 random_state=cfg.RNG_SEED, dual=True)
        self.pos_cur = 0
        self.num_neg_added = 0
        self.retrain_limit = 2000
        self.evict_thresh = -1.1
        self.loss_history = []
项目:DiscourseSenser    作者:WladimirSidorenko    | 项目源码 | 文件源码
def __init__(self, a_clf=None, a_grid_search=False):
        """Class constructor.

        Initialize classifier.

        Args:
          a_clf (classifier or None):
            classifier to use or None for default
          a_grid_search (bool): use grid search for estimating hyper-parameters

        """
        classifier = a_clf or LinearSVC(C=DFLT_C,
                                        **DFLT_PARAMS)
        self._gs = a_grid_search
        self._model = Pipeline([("vect", DictVectorizer()),
                                ("clf", classifier)])
项目:RPN    作者:hfut721    | 项目源码 | 文件源码
def __init__(self, cls, dim, feature_scale=1.0,
                 C=0.001, B=10.0, pos_weight=2.0):
        self.pos = np.zeros((0, dim), dtype=np.float32)
        self.neg = np.zeros((0, dim), dtype=np.float32)
        self.B = B
        self.C = C
        self.cls = cls
        self.pos_weight = pos_weight
        self.dim = dim
        self.feature_scale = feature_scale
        self.svm = svm.LinearSVC(C=C, class_weight={1: 2, -1: 1},
                                 intercept_scaling=B, verbose=1,
                                 penalty='l2', loss='l1',
                                 random_state=cfg.RNG_SEED, dual=True)
        self.pos_cur = 0
        self.num_neg_added = 0
        self.retrain_limit = 2000
        self.evict_thresh = -1.1
        self.loss_history = []
项目:AirTicketPredicting    作者:junlulocky    | 项目源码 | 文件源码
def __init__(self, isTrain, isOutlierRemoval=0):
        """
        The linear models ``LinearSVC()`` and ``SVC(kernel='linear')`` yield slightly
        different decision boundaries. This can be a consequence of the following
        differences:
        - ``LinearSVC`` minimizes the squared hinge loss while ``SVC`` minimizes the
          regular hinge loss.

        - ``LinearSVC`` uses the One-vs-All (also known as One-vs-Rest) multiclass
          reduction while ``SVC`` uses the One-vs-One multiclass reduction.
        :return:
        """
        super(ClassificationSVM, self).__init__(isTrain, isOutlierRemoval)

        # data preprocessing
        self.dataPreprocessing()
        self.clf = svm.SVC() # define the SVM classifier

        C = 1.0  # SVM regularization parameter
        self.svc = svm.SVC(kernel='linear', C=C, max_iter=100000)
        self.rbf_svc = svm.SVC(kernel='rbf', gamma=0.7, C=C)
        self.poly_svc = svm.SVC(kernel='poly', coef0=1, degree=3, C=C)
        self.lin_svc = svm.LinearSVC(C=C)
项目:SentiCR    作者:senticr    | 项目源码 | 文件源码
def get_classifier(self):
        algo=self.algo

        if algo=="GBT":
            return GradientBoostingClassifier()
        elif algo=="RF":
            return  RandomForestClassifier()
        elif algo=="ADB":
            return AdaBoostClassifier()
        elif algo =="DT":
            return  DecisionTreeClassifier()
        elif algo=="NB":
            return  BernoulliNB()
        elif algo=="SGD":
            return  SGDClassifier()
        elif algo=="SVC":
            return LinearSVC()
        elif algo=="MLPC":
            return MLPClassifier(activation='logistic',  batch_size='auto',
            early_stopping=True, hidden_layer_sizes=(100,), learning_rate='adaptive',
            learning_rate_init=0.1, max_iter=5000, random_state=1,
            solver='lbfgs', tol=0.0001, validation_fraction=0.1, verbose=False,
            warm_start=False)
        return 0
项目:dask-searchcv    作者:dask    | 项目源码 | 文件源码
def test_classes__property():
    # Test that classes_ property matches best_estimator_.classes_
    X = np.arange(100).reshape(10, 10)
    y = np.array([0] * 5 + [1] * 5)
    Cs = [.1, 1, 10]

    grid_search = dcv.GridSearchCV(LinearSVC(random_state=0), {'C': Cs})
    grid_search.fit(X, y)
    assert_array_equal(grid_search.best_estimator_.classes_,
                       grid_search.classes_)

    # Test that regressors do not have a classes_ attribute
    grid_search = dcv.GridSearchCV(Ridge(), {'alpha': [1.0, 2.0]})
    grid_search.fit(X, y)
    assert not hasattr(grid_search, 'classes_')

    # Test that the grid searcher has no classes_ attribute before it's fit
    grid_search = dcv.GridSearchCV(LinearSVC(random_state=0), {'C': Cs})
    assert not hasattr(grid_search, 'classes_')

    # Test that the grid searcher has no classes_ attribute without a refit
    grid_search = dcv.GridSearchCV(LinearSVC(random_state=0),
                                   {'C': Cs}, refit=False)
    grid_search.fit(X, y)
    assert not hasattr(grid_search, 'classes_')
项目:dask-searchcv    作者:dask    | 项目源码 | 文件源码
def test_grid_search_sparse():
    # Test that grid search works with both dense and sparse matrices
    X_, y_ = make_classification(n_samples=200, n_features=100, random_state=0)

    clf = LinearSVC()
    cv = dcv.GridSearchCV(clf, {'C': [0.1, 1.0]})
    cv.fit(X_[:180], y_[:180])
    y_pred = cv.predict(X_[180:])
    C = cv.best_estimator_.C

    X_ = sp.csr_matrix(X_)
    clf = LinearSVC()
    cv = dcv.GridSearchCV(clf, {'C': [0.1, 1.0]})
    cv.fit(X_[:180].tocoo(), y_[:180])
    y_pred2 = cv.predict(X_[180:])
    C2 = cv.best_estimator_.C

    assert np.mean(y_pred == y_pred2) >= .9
    assert C == C2
项目:pumil    作者:levelfour    | 项目源码 | 文件源码
def train_pumil_clf(bags, pidx, uidx, w, NL, learning_phase = False):
  # top-{NL} reliable negative bags
  relnidx = reliable_negative_bag_idx(bags, uidx, w, NL)
  Bn = [bags[j] for j in relnidx]
  # estimated p(X|Y=-1) via WKDE
  Dn = weighted_kde(Bn, w[relnidx])
  # form Positive Margin Pool (PMP)
  pmp_x, pmp_y, pmp_conf = form_pmp(bags, w, pidx, relnidx, Dn)
  # train SVM by using PMP instances
  pmp_weighted_x = np.multiply(pmp_x.T, pmp_conf).T
  clf = svm.LinearSVC(loss = 'hinge')
  clf.fit(pmp_weighted_x, pmp_y)
  clf_ = pumil_clf_wrapper(lambda x: float(clf.decision_function(x)), Dn, learning_phase)

  if learning_phase:
    return clf_, relnidx

  else:
    return clf_
项目:TattDL    作者:z-harry-sun    | 项目源码 | 文件源码
def __init__(self, cls, dim, feature_scale=1.0,
                 C=0.001, B=10.0, pos_weight=2.0):
        self.pos = np.zeros((0, dim), dtype=np.float32)
        self.neg = np.zeros((0, dim), dtype=np.float32)
        self.B = B
        self.C = C
        self.cls = cls
        self.pos_weight = pos_weight
        self.dim = dim
        self.feature_scale = feature_scale
        self.svm = svm.LinearSVC(C=C, class_weight={1: 2, -1: 1},
                                 intercept_scaling=B, verbose=1,
                                 penalty='l2', loss='l1',
                                 random_state=cfg.RNG_SEED, dual=True)
        self.pos_cur = 0
        self.num_neg_added = 0
        self.retrain_limit = 2000
        self.evict_thresh = -1.1
        self.loss_history = []
项目:CRAFT    作者:byangderek    | 项目源码 | 文件源码
def __init__(self, cls, dim, feature_scale=1.0,
                 C=0.001, B=10.0, pos_weight=2.0):
        self.pos = np.zeros((0, dim), dtype=np.float32)
        self.neg = np.zeros((0, dim), dtype=np.float32)
        self.B = B
        self.C = C
        self.cls = cls
        self.pos_weight = pos_weight
        self.dim = dim
        self.feature_scale = feature_scale
        self.svm = svm.LinearSVC(C=C, class_weight={1: 2, -1: 1},
                                 intercept_scaling=B, verbose=1,
                                 penalty='l2', loss='l1',
                                 random_state=cfg.RNG_SEED, dual=True)
        self.pos_cur = 0
        self.num_neg_added = 0
        self.retrain_limit = 2000
        self.evict_thresh = -1.1
        self.loss_history = []
项目:CRAFT    作者:byangderek    | 项目源码 | 文件源码
def __init__(self, cls, dim, feature_scale=1.0,
                 C=0.001, B=10.0, pos_weight=2.0):
        self.pos = np.zeros((0, dim), dtype=np.float32)
        self.neg = np.zeros((0, dim), dtype=np.float32)
        self.B = B
        self.C = C
        self.cls = cls
        self.pos_weight = pos_weight
        self.dim = dim
        self.feature_scale = feature_scale
        self.svm = svm.LinearSVC(C=C, class_weight={1: 2, -1: 1},
                                 intercept_scaling=B, verbose=1,
                                 penalty='l2', loss='l1',
                                 random_state=cfg.RNG_SEED, dual=True)
        self.pos_cur = 0
        self.num_neg_added = 0
        self.retrain_limit = 2000
        self.evict_thresh = -1.1
        self.loss_history = []
项目:CRAFT    作者:byangderek    | 项目源码 | 文件源码
def __init__(self, cls, dim, feature_scale=1.0,
                 C=0.001, B=10.0, pos_weight=2.0):
        self.pos = np.zeros((0, dim), dtype=np.float32)
        self.neg = np.zeros((0, dim), dtype=np.float32)
        self.B = B
        self.C = C
        self.cls = cls
        self.pos_weight = pos_weight
        self.dim = dim
        self.feature_scale = feature_scale
        self.svm = svm.LinearSVC(C=C, class_weight={1: 2, -1: 1},
                                 intercept_scaling=B, verbose=1,
                                 penalty='l2', loss='l1',
                                 random_state=cfg.RNG_SEED, dual=True)
        self.pos_cur = 0
        self.num_neg_added = 0
        self.retrain_limit = 2000
        self.evict_thresh = -1.1
        self.loss_history = []
项目:coremltools    作者:apple    | 项目源码 | 文件源码
def convert(model, feature_names, target):
    """Convert a LinearSVC model to the protobuf spec.
    Parameters
    ----------
    model: LinearSVC
        A trained LinearSVC model.

    feature_names: [str]
        Name of the input columns.

    target: str
        Name of the output column.

    Returns
    -------
    model_spec: An object of type Model_pb.
        Protobuf representation of the model
    """
    if not(_HAS_SKLEARN):
        raise RuntimeError('scikit-learn not found. scikit-learn conversion API is disabled.')

    _sklearn_util.check_expected_type(model, _LinearSVC)
    _sklearn_util.check_fitted(model, lambda m: hasattr(m, 'coef_'))

    return _MLModel(_logistic_regression._convert(model, feature_names, target))
项目:coremltools    作者:apple    | 项目源码 | 文件源码
def _conversion_and_evaluation_helper_for_linear_svc(self, class_labels):
        ARGS = [ {},
                 {'C' : .75, 'loss': 'hinge'},
                 {'penalty': 'l1', 'dual': False},
                 {'tol': 0.001, 'fit_intercept': False},
                 {'intercept_scaling': 1.5}
        ]

        x, y = GlmCassifierTest._generate_random_data(class_labels)
        column_names = ['x1', 'x2']
        df = pd.DataFrame(x, columns=column_names)

        for cur_args in ARGS:
            print(class_labels, cur_args)
            cur_model = LinearSVC(**cur_args)
            cur_model.fit(x, y)

            spec = convert(cur_model, input_features=column_names,
                           output_feature_names='target')

            df['prediction'] = cur_model.predict(x)

            cur_eval_metics = evaluate_classifier(spec, df, verbose=False)
            self.assertEquals(cur_eval_metics['num_errors'], 0)
项目:semihin    作者:HKUST-KnowComp    | 项目源码 | 文件源码
def svm_experiment(scope_name, X, y):
    for lp in lp_cand:
        results = []
        for r in range(50):
            with open('data/local/split/' + scope_name + '/lb' + str(lp).zfill(3) + '_' + str(r).zfill(
                    3) + '_train') as f:
                trainLabel = pk.load(f)
            with open('data/local/split/' + scope_name + '/lb' + str(lp).zfill(3) + '_' + str(r).zfill(
                    3) + '_test') as f:
                testLabel = pk.load(f)

            XTrain = X[trainLabel.keys()]
            XTest = X[testLabel.keys()]
            yTrain = y[trainLabel.keys()]
            yTest = y[testLabel.keys()]

            # train
            clf = LinearSVC(C=0.01)
            clf.fit(XTrain, yTrain)

            # test
            pred = clf.predict(XTest)
            results.append(sum(pred == yTest) / float(yTest.shape[0]))
        return np.mean(results)
项目:document_classification    作者:scotthlee    | 项目源码 | 文件源码
def fit(self, x, y):
        # Convert non-binary features to binary
        bin_x = tfidf_to_counts(x)

        # Calculating the log-count ratio
        X_pos = bin_x[np.where(y == 1)]
        X_neg = bin_x[np.where(y == 0)]
        self.r = log_count_ratio(X_pos, X_neg)
        X = np.multiply(self.r, bin_x)

        # Training linear SVM with NB features but no interpolation
        svm = LinearSVC(C=self.C)
        svm.fit(X, y)
        self.coef_ = svm.coef_
        self.int_coef_ = interpolate(self.coef_, self.beta)
        self.bias = svm.intercept_

    # Scores the interpolated model
项目:wende    作者:h404bi    | 项目源码 | 文件源码
def init_model():
        # “????”??
        f_trunk = QuestionTrunkVectorizer(tokenizer=tokenize)

        # Word2Vec ????
        f_word2vec = Question2VecVectorizer(tokenizer=tokenize)

        # ???? (400 ?)
        union_features = FeatureUnion([
            ('f_trunk_lsa', Pipeline([
                ('trunk', f_trunk),
                # ??_????: ?????? (LSA)
                ('lsa', TruncatedSVD(n_components=200, n_iter=10))
            ])),
            ('f_word2vec', f_word2vec),
        ])

        model = Pipeline([('union', union_features), ('clf', LinearSVC(C=0.02))])
        return model
项目:faster_rcnn_logo    作者:romyny    | 项目源码 | 文件源码
def __init__(self, cls, dim, feature_scale=1.0,
                 C=0.001, B=10.0, pos_weight=2.0):
        self.pos = np.zeros((0, dim), dtype=np.float32)
        self.neg = np.zeros((0, dim), dtype=np.float32)
        self.B = B
        self.C = C
        self.cls = cls
        self.pos_weight = pos_weight
        self.dim = dim
        self.feature_scale = feature_scale
        self.svm = svm.LinearSVC(C=C, class_weight={1: 2, -1: 1},
                                 intercept_scaling=B, verbose=1,
                                 penalty='l2', loss='l1',
                                 random_state=cfg.RNG_SEED, dual=True)
        self.pos_cur = 0
        self.num_neg_added = 0
        self.retrain_limit = 2000
        self.evict_thresh = -1.1
        self.loss_history = []
项目:Steal-ML    作者:ftramer    | 项目源码 | 文件源码
def grid_retrain_in_f(self, n_dim=500):
        rbf_map = RBFSampler(n_dim, random_state=1)
        fourier_approx_svm = pipeline.Pipeline([("mapper", rbf_map),
                                                ("svm", LinearSVC())])

        # C_range = np.logspace(-5, 15, 21, base=2)
        # gamma_range = np.logspace(-15, 3, 19, base=2)
        # param_grid = dict(mapper__gamma=gamma_range, svm__C=C_range)
        # cv = StratifiedShuffleSplit(Y, n_iter=5, test_size=0.2, random_state=42)
        # grid = GridSearchCV(fourier_approx_svm, param_grid=param_grid, cv=cv)
        # grid.fit(X, Y)
        #
        # rbf_svc2 = grid.best_estimator_

        rbf_svc2 = fourier_approx_svm
        rbf_svc2.fit(self.X_ex, self.y_ex)

        self.set_clf2(rbf_svc2)
        return self.benchmark()
项目:Faster_RCNN_Training_Toolkit    作者:VerseChow    | 项目源码 | 文件源码
def __init__(self, cls, dim, feature_scale=1.0,
                 C=0.001, B=10.0, pos_weight=2.0):
        self.pos = np.zeros((0, dim), dtype=np.float32)
        self.neg = np.zeros((0, dim), dtype=np.float32)
        self.B = B
        self.C = C
        self.cls = cls
        self.pos_weight = pos_weight
        self.dim = dim
        self.feature_scale = feature_scale
        self.svm = svm.LinearSVC(C=C, class_weight={1: 2, -1: 1},
                                 intercept_scaling=B, verbose=1,
                                 penalty='l2', loss='l1',
                                 random_state=cfg.RNG_SEED, dual=True)
        self.pos_cur = 0
        self.num_neg_added = 0
        self.retrain_limit = 2000
        self.evict_thresh = -1.1
        self.loss_history = []
项目:KITTI-detection-OHEM    作者:manutdzou    | 项目源码 | 文件源码
def __init__(self, cls, dim, feature_scale=1.0,
                 C=0.001, B=10.0, pos_weight=2.0):
        self.pos = np.zeros((0, dim), dtype=np.float32)
        self.neg = np.zeros((0, dim), dtype=np.float32)
        self.B = B
        self.C = C
        self.cls = cls
        self.pos_weight = pos_weight
        self.dim = dim
        self.feature_scale = feature_scale
        self.svm = svm.LinearSVC(C=C, class_weight={1: 2, -1: 1},
                                 intercept_scaling=B, verbose=1,
                                 penalty='l2', loss='l1',
                                 random_state=cfg.RNG_SEED, dual=True)
        self.pos_cur = 0
        self.num_neg_added = 0
        self.retrain_limit = 2000
        self.evict_thresh = -1.1
        self.loss_history = []
项目:QuestionAnswerNLP    作者:debjyoti385    | 项目源码 | 文件源码
def train(labeled_featuresets, C=1e5):
        """
        :param labeled_featuresets: A list of classified featuresets,
            i.e., a list of tuples ``(featureset, label)``.
        """
        feat = [featureset for featureset, label in labeled_featuresets]
        feature_vectorizer = MVectorizer.DictsVectorizer()
        X = feature_vectorizer.fit_transform(feat)
        X = Normalizer().fit_transform(X)
        label_set = set( [label for featureset, label in labeled_featuresets] )
        label_vectorizer = dict( [(label,num) for num,label in enumerate(label_set)] )
        y = numpy.array([label_vectorizer[label] for featureset, label in labeled_featuresets])
        # print "Training on %d examples with %d features..."%(X.shape[0],X.shape[1]),
        classifier = OneVsRestClassifier(LinearSVC(loss='squared_hinge', penalty='l2', dual=True, tol=1e-5, C=C))
        classifier.fit(X,y)
        # print "done"

        return scikit_classifier(feature_vectorizer,label_vectorizer,classifier)
项目:rcnn-with-tflearn    作者:Redoblue    | 项目源码 | 文件源码
def train_svms():
    if not os.path.isfile('models/fine_tune.model.index'):
        print('models/fine_tune.model doesn\'t exist.')
        return

    net = create_alexnet()
    model = tflearn.DNN(net)
    model.load('models/fine_tune.model')

    train_file_dir = 'svm_train/'
    flist = os.listdir(train_file_dir)
    svms = []
    for train_file in flist:
        if "pkl" in train_file:
            continue
        X, Y = generate_single_svm_train_data(train_file_dir + train_file)
        train_features = []
        for i in X:
            feats = model.predict([i])
            train_features.append(feats[0])
        print("feature dimension of fitting: {}".format(np.shape(train_features)))
        clf = svm.LinearSVC()
        clf.fit(train_features, Y)
        svms.append(clf)
    joblib.dump(svms, 'models/train_svm.model')
项目:fake_news    作者:bmassman    | 项目源码 | 文件源码
def article_trainers(articles: ArticleDB):
    """
    Run repeated models against article db to predict validity score for
    articles.
    """
    models = [(DecisionTreeClassifier, {}),
              (RandomForestClassifier, {}),
              (LogisticRegression, {'C': [0.01, 0.1, 1, 10, 100]}),
              (MultinomialNB, {'alpha': [0.1, 1.0, 10.0, 100.0]}),
              (LinearSVC, {'C': [0.01, 0.1, 1, 10, 100]})]
    trained_models = []
    for classifier, param_grid in models:
        res = train_model(articles, classifier, param_grid, probabilities=True)
        trained_models.append((str(res), res))
    ensemble_learner = VotingClassifier(estimators=trained_models[:4],
                                        voting='soft')
    train_model(articles, ensemble_learner, {})
项目:ohem    作者:abhi2610    | 项目源码 | 文件源码
def __init__(self, cls, dim, feature_scale=1.0,
                 C=0.001, B=10.0, pos_weight=2.0):
        self.pos = np.zeros((0, dim), dtype=np.float32)
        self.neg = np.zeros((0, dim), dtype=np.float32)
        self.B = B
        self.C = C
        self.cls = cls
        self.pos_weight = pos_weight
        self.dim = dim
        self.feature_scale = feature_scale
        self.svm = svm.LinearSVC(C=C, class_weight={1: 2, -1: 1},
                                 intercept_scaling=B, verbose=1,
                                 penalty='l2', loss='l1',
                                 random_state=cfg.RNG_SEED, dual=True)
        self.pos_cur = 0
        self.num_neg_added = 0
        self.retrain_limit = 2000
        self.evict_thresh = -1.1
        self.loss_history = []
项目:py-faster-rcnn-dockerface    作者:natanielruiz    | 项目源码 | 文件源码
def __init__(self, cls, dim, feature_scale=1.0,
                 C=0.001, B=10.0, pos_weight=2.0):
        self.pos = np.zeros((0, dim), dtype=np.float32)
        self.neg = np.zeros((0, dim), dtype=np.float32)
        self.B = B
        self.C = C
        self.cls = cls
        self.pos_weight = pos_weight
        self.dim = dim
        self.feature_scale = feature_scale
        self.svm = svm.LinearSVC(C=C, class_weight={1: 2, -1: 1},
                                 intercept_scaling=B, verbose=1,
                                 penalty='l2', loss='l1',
                                 random_state=cfg.RNG_SEED, dual=True)
        self.pos_cur = 0
        self.num_neg_added = 0
        self.retrain_limit = 2000
        self.evict_thresh = -1.1
        self.loss_history = []
项目:yellowbrick    作者:DistrictDataLabs    | 项目源码 | 文件源码
def test_decision_function_rocauc(self):
        """
        Test ROCAUC with classifiers that have a decision function
        """
        # Load the model and assert there is no predict_proba method.
        model = LinearSVC()
        with self.assertRaises(AttributeError):
            model.predict_proba

        # Fit model and visualizer
        visualizer = ROCAUC(model)
        visualizer.fit(X, yb)

        expected = np.asarray([
            0.204348,  0.228593,  0.219908, -0.211756, -0.26155 , -0.221405
        ])

        # Get the predict_proba scores and evaluate
        y_scores = visualizer._get_y_scores(X)
        npt.assert_array_almost_equal(y_scores, expected, decimal=1)
项目:svm    作者:fzn0728    | 项目源码 | 文件源码
def tune_para(dataframe, i):
    # To apply an classifier on this data, we need to flatten the image, to
    # turn the data in a (samples, feature) matrix:
    columns = ['SMA_10','Momentum','stoch_K','WMA_10','MACD','A/D','Volume']
    X = dataframe[columns].as_matrix()
    y = dataframe['Adj Close'].as_matrix()
    X_train = X[i-200:i]
    y_train = y[i-200:i]
    X_test = X[i:i+1]
    y_test = y[i:i+1]

    ### Train four kinds of SVM model
    C = 1  # SVM regularization parameter
    svc = svm.SVC(cache_size = 1000, kernel='linear', C=C).fit(X_train, y_train)
    rbf_svc = svm.SVC(cache_size = 1000, kernel='rbf', gamma=0.7, C=C).fit(X_train, y_train)
    poly_svc = svm.SVC(cache_size = 1000, kernel='poly', degree=3, C=C).fit(X_train, y_train)
    lin_svc = svm.LinearSVC(loss='squared_hinge', penalty='l1', dual=False, C=C).fit(X_train, y_train)
    Y_result = y_test


    ### Make the prediction
    for i, clf in enumerate((svc, lin_svc, rbf_svc, poly_svc)):
        pred = clf.predict(X_test)
        Y_result = np.vstack((Y_result, np.array(pred))) # append prediction on Y_result
    return Y_result.T
项目:NVDM-For-Document-Classification    作者:cryanzpj    | 项目源码 | 文件源码
def SVMbanchmark(X_train, y_train, X_test, y_test):
    # optimial c is 10.0, f1 = 0.52
    print("Training LinearSVC with l1-based feature selection")
    X_valid, y_valid = X_test[:10000], y_test[:10000]
    score_list = []
    CList = [0.1, 0.5, 1, 10, 50, 100]
    for c in CList:
        clf = OneVsRestClassifier(LinearSVC(C=c, penalty='l1', dual=False))
        clf.fit(X_train, y_train)

        pred = clf.predict(X_valid)
        score = metrics.f1_score(y_valid, pred, average="macro")
        score_list.append(score)
        print("f1-score: {:f}, c is {:f}".format(score, c))
    clf = OneVsRestClassifier(LinearSVC(penality="l1", dual=False, C=CList[np.argmax(score_list)]))
    clf.fit(X_train, y_train)
    pred = clf.predict(X_test)
    score = metrics.f1_score(y_test, pred, average="micro")
    print("f1-score for test set: {:f}".format(score))
项目:NVDM-For-Document-Classification    作者:cryanzpj    | 项目源码 | 文件源码
def SVMbanchmark(X_train, X_test, y_train, y_test):
    # optimial c is 10.0, f1 = 0.52
    print("Training LinearSVC with l1-based feature selection")
    import pdb
    pdb.set_trace()
    X_valid, y_valid = X_test[:10000], y_test[:10000]
    score_list = []
    CList = [0.1, 0.5, 1, 10, 50, 100]
    for c in CList:
        clf = LinearSVC(C=c, penalty='l1', dual=False)
        clf.fit(X_train, y_train)
        pred = clf.predict(X_valid)
        score = metrics.accuracy_score(y_valid, pred)
        score_list.append(score)
        print("f1-score: {:f}, c is {:f}".format(score, c))
    clf = LinearSVC(penality="l1", dual=False, C=CList[np.argmax(score_list)])
    clf.fit(X_train, y_train)
    pred = clf.predict(X_test)
    print("f1-score for test set: {:f}".format(score))
项目:PVANet-FACE    作者:twmht    | 项目源码 | 文件源码
def __init__(self, cls, dim, feature_scale=1.0,
                 C=0.001, B=10.0, pos_weight=2.0):
        self.pos = np.zeros((0, dim), dtype=np.float32)
        self.neg = np.zeros((0, dim), dtype=np.float32)
        self.B = B
        self.C = C
        self.cls = cls
        self.pos_weight = pos_weight
        self.dim = dim
        self.feature_scale = feature_scale
        self.svm = svm.LinearSVC(C=C, class_weight={1: 2, -1: 1},
                                 intercept_scaling=B, verbose=1,
                                 penalty='l2', loss='l1',
                                 random_state=cfg.RNG_SEED, dual=True)
        self.pos_cur = 0
        self.num_neg_added = 0
        self.retrain_limit = 2000
        self.evict_thresh = -1.1
        self.loss_history = []
项目:graph-based-semi-supervised-learning    作者:deerishi    | 项目源码 | 文件源码
def compareWithSvm(self,datasetTrain,datasetTest):
        C=[0.000001,0.00001,0.0001,0.001,0.01,0.1,1,10,100,1000]
        print '\n'
        print 'dataset shape is ',datasetTrain.shape
        self.y_train=self.y_train.reshape(-1,)
        for c in C:
            self.Svm=svm.LinearSVC(C=c)
            self.Svm.fit(datasetTrain,self.y_train)
            labels=self.Svm.predict(datasetTest)
            print 'accuracy with c=',c,'  is  ',self.checkAccuracy(labels,self.y_test),'% ','\n'   











#for graph based reasoning , replace every 0 with -1
项目:SBB4-damage-tracker    作者:whorn    | 项目源码 | 文件源码
def trainClassifier(foldername,classifierName):
    model = cv2.ml.KNearest_create()
    features = []
    labels = []
    os.chdir(foldername)
    for filename in glob.iglob('*.png'):
        features.append(cv2.imread((filename),-1))
        labels.append(filename[0])
    list_hog_fd = []
    for feature in features:
        fd = hog(feature.reshape((27, 35)), orientations=9, pixels_per_cell=(9, 7), cells_per_block=(1, 1), visualise=False)
        list_hog_fd.append(fd)
    hog_features = np.array(list_hog_fd, 'float64')
    os.chdir("..")
    clf = LinearSVC()
    clf.fit(hog_features, labels)
    joblib.dump(clf,classifierName, compress=3)
    os.chdir("..")
项目:CAPTCHA_Identifier    作者:AlphaLFC    | 项目源码 | 文件源码
def __init__(self, cls, dim, feature_scale=1.0,
                 C=0.001, B=10.0, pos_weight=2.0):
        self.pos = np.zeros((0, dim), dtype=np.float32)
        self.neg = np.zeros((0, dim), dtype=np.float32)
        self.B = B
        self.C = C
        self.cls = cls
        self.pos_weight = pos_weight
        self.dim = dim
        self.feature_scale = feature_scale
        self.svm = svm.LinearSVC(C=C, class_weight={1: 2, -1: 1},
                                 intercept_scaling=B, verbose=1,
                                 penalty='l2', loss='l1',
                                 random_state=cfg.RNG_SEED, dual=True)
        self.pos_cur = 0
        self.num_neg_added = 0
        self.retrain_limit = 2000
        self.evict_thresh = -1.1
        self.loss_history = []
项目:SubCNN    作者:tanshen    | 项目源码 | 文件源码
def __init__(self, cls, dim, feature_scale=1.0,
                 C=0.001, B=10.0, pos_weight=2.0):
        self.pos = np.zeros((0, dim), dtype=np.float32)
        self.neg = np.zeros((0, dim), dtype=np.float32)
        self.B = B
        self.C = C
        self.cls = cls
        self.pos_weight = pos_weight
        self.dim = dim
        self.feature_scale = feature_scale
        self.svm = svm.LinearSVC(C=C, class_weight={1: 2, -1: 1},
                                 intercept_scaling=B, verbose=1,
                                 penalty='l2', loss='l1',
                                 random_state=cfg.RNG_SEED, dual=True)
        self.pos_cur = 0
        self.num_neg_added = 0
        self.retrain_limit = 2000
        self.evict_thresh = -1.1
        self.loss_history = []
项目:ML-note    作者:JasonK93    | 项目源码 | 文件源码
def test_RFECV():
    '''
    test the method of RFECV
    :return:  None
    '''
    iris=load_iris()
    X=iris.data
    y=iris.target
    estimator=LinearSVC()
    selector=RFECV(estimator=estimator,cv=3)
    selector.fit(X,y)
    print("N_features %s"%selector.n_features_)
    print("Support is %s"%selector.support_)
    print("Ranking %s"%selector.ranking_)
    print("Grid Scores %s"%selector.grid_scores_)
项目:movie-quality-profitability-predictor    作者:wbowditch    | 项目源码 | 文件源码
def compute_cross_fold(data):
     data_table = pd.read_csv("total_set.csv",index_col=0)

     #data_norm = (data - data.mean()) / (data.sum())
     scaler = preprocessing.StandardScaler().fit(data)
     data_scaled = scaler.transform(data)
     #print data_scaled
     profitability_target = data_table['Profitable']
     #print profitability_target
     #gross_target = data_table['Domestic Gross']
     #tomato = data_table['Rotten']


     #normalized_target_gross = (gross_target - gross_target.mean()) / (gross_target.max() - gross_target.min())
     #tomato = (tomato - tomato.mean()) / (tomato.max() - tomato.min())


     #clf_profit = svm.SVC(kernel='rbf',C=0.8, gamma=5,verbose=True)
     clf_profit = svm.LinearSVC(C=0.001,verbose=True,tol=.1)
     clf_profit.fit(data_scaled,profitability_target)
     scores = cross_val_score(clf_profit, data_scaled, profitability_target, cv=10)

     #print("Accuracy: %0.2f (+/- %0.2f)" % (scores.mean(), scores.std() * 2))
     return (scores.mean(), scores.std() * 2)
项目:ScoreCardModel    作者:data-science-tools    | 项目源码 | 文件源码
def _train(self, X_matrix, y, **kwargs):
        """????

        Parameters:

            X_matrix (numpy.array): - ????????????
            y (numpy.array): - ???????????

        Returns:

            sklearn.model: - sklearn???


        """
        from sklearn.svm import LinearSVC
        model = LinearSVC(**kwargs)
        model.fit(X_matrix, y)
        return model
项目:Parallel-SGD    作者:angadgill    | 项目源码 | 文件源码
def test_random_hasher():
    # test random forest hashing on circles dataset
    # make sure that it is linearly separable.
    # even after projected to two SVD dimensions
    # Note: Not all random_states produce perfect results.
    hasher = RandomTreesEmbedding(n_estimators=30, random_state=1)
    X, y = datasets.make_circles(factor=0.5)
    X_transformed = hasher.fit_transform(X)

    # test fit and transform:
    hasher = RandomTreesEmbedding(n_estimators=30, random_state=1)
    assert_array_equal(hasher.fit(X).transform(X).toarray(),
                       X_transformed.toarray())

    # one leaf active per data point per forest
    assert_equal(X_transformed.shape[0], X.shape[0])
    assert_array_equal(X_transformed.sum(axis=1), hasher.n_estimators)
    svd = TruncatedSVD(n_components=2)
    X_reduced = svd.fit_transform(X_transformed)
    linear_clf = LinearSVC()
    linear_clf.fit(X_reduced, y)
    assert_equal(linear_clf.score(X_reduced, y), 1.)