Python sklearn.svm 模块,SVC 实例源码

我们从Python开源项目中,提取了以下50个代码示例,用于说明如何使用sklearn.svm.SVC

项目:triage    作者:dssg    | 项目源码 | 文件源码
def trained_models():
    dataset = datasets.load_breast_cancer()
    X = dataset.data
    y = dataset.target

    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.3, random_state=12345)

    rf = RandomForestClassifier()
    rf.fit(X_train, y_train)

    lr = LogisticRegression()
    lr.fit(X_train, y_train)

    svc_w_linear_kernel = SVC(kernel='linear')
    svc_w_linear_kernel.fit(X_train, y_train)

    svc_wo_linear_kernel = SVC()
    svc_wo_linear_kernel.fit(X_train, y_train)

    dummy = DummyClassifier()
    dummy.fit(X_train, y_train)

    return {'RF':rf, 'LR':lr, 'SVC_w_linear_kernel':svc_w_linear_kernel,
            'Dummy':dummy, 'SVC_wo_linear_kernel':svc_wo_linear_kernel}
项目:sef    作者:passalis    | 项目源码 | 文件源码
def evaluate_svm(train_data, train_labels, test_data, test_labels, n_jobs=-1):
    """
    Evaluates a representation using a Linear SVM
    It uses 3-fold cross validation for selecting the C parameter
    :param train_data:
    :param train_labels:
    :param test_data:
    :param test_labels:
    :param n_jobs:
    :return: the test accuracy
    """

    # Scale data to 0-1
    scaler = MinMaxScaler()
    train_data = scaler.fit_transform(train_data)
    test_data = scaler.transform(test_data)

    parameters = {'kernel': ['linear'], 'C': [0.0001, 0.001, 0.01, 0.1, 1, 10, 100, 1000, 10000, 100000]}
    model = svm.SVC(max_iter=10000)
    clf = grid_search.GridSearchCV(model, parameters, n_jobs=n_jobs, cv=3)
    clf.fit(train_data, train_labels)
    lin_svm_test = clf.score(test_data, test_labels)
    return lin_svm_test
项目:johnson-county-ddj-public    作者:dssg    | 项目源码 | 文件源码
def get_feature_importance(self,clf, model_name ):
        clfs = {'RandomForestClassifier':'feature_importances',
                'ExtraTreesClassifier': 'feature_importances',
                'AdaBoostClassifier': 'feature_importances',
                'LogisticRegression': 'coef',
                'svm.SVC': 'coef',
                'GradientBoostingClassifier': 'feature_importances',
                'GaussianNB': None,
                'DecisionTreeClassifier': 'feature_importances',
                'SGDClassifier': 'coef',
                'KNeighborsClassifier': None,
                'linear.SVC': 'coef'}

        if clfs[model_name] == 'feature_importances':
            return  list(clf.feature_importances_)
        elif clfs[model_name] == 'coef':
            return  list(clf.coef_.tolist())
        else:
            return None
项目:rltk    作者:usc-isi-i2    | 项目源码 | 文件源码
def get_classifier_class(class_name):
    name_table = {
        'svm': SVC,
        'k_neighbors': KNeighborsClassifier,
        'gaussian_process': GaussianProcessClassifier,
        'decision_tree': DecisionTreeClassifier,
        'random_forest': RandomForestClassifier,
        'ada_boost': AdaBoostClassifier,
        'mlp': MLPClassifier,
        'gaussian_naive_bayes': GaussianNB,
        'quadratic_discriminant_analysis': QuadraticDiscriminantAnalysis
    }

    if class_name not in name_table:
        raise ValueError('No such classifier')

    return name_table[class_name]
项目:johnson-county-ddj-public    作者:dssg    | 项目源码 | 文件源码
def define_model(self, model, parameters, n_cores = 0):
        clfs = {'RandomForestClassifier': RandomForestClassifier(n_estimators=50, n_jobs=7),
                'ExtraTreesClassifier': ExtraTreesClassifier(n_estimators=10, n_jobs=7, criterion='entropy'),
                'AdaBoostClassifier': AdaBoostClassifier(DecisionTreeClassifier(max_depth=1), algorithm="SAMME", n_estimators=200),
                'LogisticRegression': LogisticRegression(penalty='l1', C=1e5),
                'svm.SVC': svm.SVC(kernel='linear', probability=True, random_state=0),
                'GradientBoostingClassifier': GradientBoostingClassifier(learning_rate=0.05, subsample=0.5, max_depth=6, n_estimators=10),
                'GaussianNB': GaussianNB(),
                'DecisionTreeClassifier': DecisionTreeClassifier(),
                'SGDClassifier': SGDClassifier(loss="hinge", penalty="l2", n_jobs=7),
                'KNeighborsClassifier': KNeighborsClassifier(n_neighbors=3), 
                'linear.SVC': svm.LinearSVC() }

        if model not in clfs:
            raise ConfigError("Unsupported model {}".format(model))

        clf = clfs[model]
        clf.set_params(**parameters)
        return clf
项目:easyML    作者:aarshayj    | 项目源码 | 文件源码
def __init__(
        self,data_block, predictors=[],cv_folds=10,
        scoring_metric='accuracy',additional_display_metrics=[]):

        base_classification.__init__(
            self, alg=SVC(), data_block=data_block, predictors=predictors,
            cv_folds=cv_folds,scoring_metric=scoring_metric, 
            additional_display_metrics=additional_display_metrics
            )

        self.model_output=pd.Series(self.default_parameters)
        self.model_output['Coefficients'] = "-"

        #Set parameters to default values:
        self.set_parameters(set_default=True)

        #Check if probabilities enables:
        if not self.alg.get_params()['probability']:
            self.probabilities_available = False
项目:ML_NTU    作者:LeoTsui    | 项目源码 | 文件源码
def quiz15():
    X, Y, N = read_file("features.train")
    Y_0 = (Y == 0).astype(int)

    c_l = []
    w_l = []
    for i in range(-6, 4, 2):
        c = 10 ** i
        c_l.append(c)
        clf = svm.SVC(C=c, kernel='linear', shrinking=False)
        clf.fit(X, Y_0)
        w = clf.coef_.flatten()
        norm_w = np.linalg.norm(w, ord=2)
        w_l.append(norm_w)
        print("C = ", c, '    norm(w) =', norm_w)

    plt.semilogx(c_l, w_l)
    plt.savefig("h5_q15.png", dpi=300)
项目:MultimodalAutoencoder    作者:natashamjaques    | 项目源码 | 文件源码
def train_and_predict(self, param_dict, predict_on='val'):
        """Initializes an SVM classifier according to the desired parameter settings, 
        trains it, and returns the predictions on the appropriate evaluation dataset.

        Args:
            param_dict: A dictionary with keys representing parameter names and 
                values representing settings for those parameters.
            predict_on: The dataset used for evaluating the model. Can set to 
                'Test' to get final results.

        Returns: The predicted Y labels.
        """
        if predict_on == 'test':
            predict_X = self.data_loader.test_X
        else:
            predict_X = self.data_loader.val_X

        self.model = SVC(C=param_dict['C'], kernel=param_dict['kernel'], gamma=param_dict['beta'])
        self.model.fit(self.data_loader.train_X, self.data_loader.train_Y)
        preds = self.predict_on_data(predict_X)

        return preds
项目:brainiak    作者:brainiak    | 项目源码 | 文件源码
def example_of_aggregating_sim_matrix(raw_data, labels, num_subjects, num_epochs_per_subj):
    # aggregate the kernel matrix to save memory
    svm_clf = svm.SVC(kernel='precomputed', shrinking=False, C=1)
    clf = Classifier(svm_clf, num_processed_voxels=1000, epochs_per_subj=num_epochs_per_subj)
    rearranged_data = raw_data[num_epochs_per_subj:] + raw_data[0:num_epochs_per_subj]
    rearranged_labels = labels[num_epochs_per_subj:] + labels[0:num_epochs_per_subj]
    clf.fit(list(zip(rearranged_data, rearranged_data)), rearranged_labels,
            num_training_samples=num_epochs_per_subj*(num_subjects-1))
    predict = clf.predict()
    print(predict)
    print(clf.decision_function())
    test_labels = labels[0:num_epochs_per_subj]
    incorrect_predict = hamming(predict, np.asanyarray(test_labels)) * num_epochs_per_subj
    logger.info(
        'when aggregating the similarity matrix to save memory, '
        'the accuracy is %d / %d = %.2f' %
        (num_epochs_per_subj-incorrect_predict, num_epochs_per_subj,
         (num_epochs_per_subj-incorrect_predict) * 1.0 / num_epochs_per_subj)
    )
    # when the kernel matrix is computed in portion, the test data is already in
    print(clf.score(None, test_labels))
项目:brainiak    作者:brainiak    | 项目源码 | 文件源码
def example_of_cross_validation_using_model_selection(raw_data, labels, num_subjects, num_epochs_per_subj):
    # NOTE: this method does not work for sklearn.svm.SVC with precomputed kernel
    # when the kernel matrix is computed in portions; also, this method only works
    # for self-correlation, i.e. correlation between the same data matrix.

    # no shrinking, set C=1
    svm_clf = svm.SVC(kernel='precomputed', shrinking=False, C=1)
    #logit_clf = LogisticRegression()
    clf = Classifier(svm_clf, epochs_per_subj=num_epochs_per_subj)
    # doing leave-one-subject-out cross validation
    # no shuffling in cv
    skf = model_selection.StratifiedKFold(n_splits=num_subjects,
                                          shuffle=False)
    scores = model_selection.cross_val_score(clf, list(zip(raw_data, raw_data)),
                                             y=labels,
                                             cv=skf)
    print(scores)
    logger.info(
        'the overall cross validation accuracy is %.2f' %
        np.mean(scores)
    )
项目:brainiak    作者:brainiak    | 项目源码 | 文件源码
def example_of_correlating_two_components(raw_data, raw_data2, labels, num_subjects, num_epochs_per_subj):
    # aggregate the kernel matrix to save memory
    svm_clf = svm.SVC(kernel='precomputed', shrinking=False, C=1)
    clf = Classifier(svm_clf, epochs_per_subj=num_epochs_per_subj)
    num_training_samples=num_epochs_per_subj*(num_subjects-1)
    clf.fit(list(zip(raw_data[0:num_training_samples], raw_data2[0:num_training_samples])),
            labels[0:num_training_samples])
    X = list(zip(raw_data[num_training_samples:], raw_data2[num_training_samples:]))
    predict = clf.predict(X)
    print(predict)
    print(clf.decision_function(X))
    test_labels = labels[num_training_samples:]
    incorrect_predict = hamming(predict, np.asanyarray(test_labels)) * num_epochs_per_subj
    logger.info(
        'when aggregating the similarity matrix to save memory, '
        'the accuracy is %d / %d = %.2f' %
        (num_epochs_per_subj-incorrect_predict, num_epochs_per_subj,
         (num_epochs_per_subj-incorrect_predict) * 1.0 / num_epochs_per_subj)
    )
    # when the kernel matrix is computed in portion, the test data is already in
    print(clf.score(X, test_labels))
项目:brainiak    作者:brainiak    | 项目源码 | 文件源码
def example_of_correlating_two_components_aggregating_sim_matrix(raw_data, raw_data2, labels,
                                                                 num_subjects, num_epochs_per_subj):
    # aggregate the kernel matrix to save memory
    svm_clf = svm.SVC(kernel='precomputed', shrinking=False, C=1)
    clf = Classifier(svm_clf, num_processed_voxels=1000, epochs_per_subj=num_epochs_per_subj)
    num_training_samples=num_epochs_per_subj*(num_subjects-1)
    clf.fit(list(zip(raw_data, raw_data2)), labels,
            num_training_samples=num_training_samples)
    predict = clf.predict()
    print(predict)
    print(clf.decision_function())
    test_labels = labels[num_training_samples:]
    incorrect_predict = hamming(predict, np.asanyarray(test_labels)) * num_epochs_per_subj
    logger.info(
        'when aggregating the similarity matrix to save memory, '
        'the accuracy is %d / %d = %.2f' %
        (num_epochs_per_subj-incorrect_predict, num_epochs_per_subj,
         (num_epochs_per_subj-incorrect_predict) * 1.0 / num_epochs_per_subj)
    )
    # when the kernel matrix is computed in portion, the test data is already in
    print(clf.score(None, test_labels))

# python3 classification.py face_scene bet.nii.gz face_scene/prefrontal_top_mask.nii.gz face_scene/fs_epoch_labels.npy
项目:searchgrid    作者:jnothman    | 项目源码 | 文件源码
def test_build_param_grid_set_estimator():
    clf1 = SVC()
    clf2 = LogisticRegression()
    clf3 = SVC()
    clf4 = SGDClassifier()
    estimator = set_grid(Pipeline([('sel', set_grid(SelectKBest(), k=[2, 3])),
                                   ('clf', None)]),
                         clf=[set_grid(clf1, kernel=['linear']),
                              clf2,
                              set_grid(clf3, kernel=['poly'], degree=[2, 3]),
                              clf4])
    param_grid = [{'clf': [clf1], 'clf__kernel': ['linear'], 'sel__k': [2, 3]},
                  {'clf': [clf3], 'clf__kernel': ['poly'],
                   'clf__degree': [2, 3], 'sel__k': [2, 3]},
                  {'clf': [clf2, clf4], 'sel__k': [2, 3]}]
    assert build_param_grid(estimator) == param_grid
项目:searchgrid    作者:jnothman    | 项目源码 | 文件源码
def test_make_grid_search():
    X, y = load_iris(return_X_y=True)
    lr = LogisticRegression()
    svc = set_grid(SVC(), kernel=['poly'], degree=[2, 3])
    gs1 = make_grid_search(lr, cv=5)  # empty grid
    gs2 = make_grid_search(svc, cv=5)
    gs3 = make_grid_search([lr, svc], cv=5)
    for gs, n_results in [(gs1, 1), (gs2, 2), (gs3, 3)]:
        gs.fit(X, y)
        assert gs.cv == 5
        assert len(gs.cv_results_['params']) == n_results

    svc_mask = gs3.cv_results_['param_root'] == svc
    assert svc_mask.sum() == 2
    assert gs3.cv_results_['param_root__degree'][svc_mask].tolist() == [2, 3]
    assert gs3.cv_results_['param_root'][~svc_mask].tolist() == [lr]
项目:entity-linker    作者:seucs    | 项目源码 | 文件源码
def train(train_dataTables, human_marks):
    global classifier
    samples =[]
    target = []

    for nn, dataTable in enumerate(train_dataTables):
        for i in xrange(dataTable.row):
            for j in xrange(dataTable.col):
                mention = dataTable[i][j]
                if mention.cid == -1:
                    continue
                eids = dataTable.get_eids(i, j)
                words = dataTable.get_words(i, j)
                entites = dataTable.get_entities(i ,j)
                true_id = human_marks[nn][i][j]['id']
                for ii, entity in enumerate(mention.candidates):
                    prior = entity.popular
                    SR = mention.getSR(ii, entites)
                    res = int(true_id == entity.id)
                    samples.append([prior, SR])
                    target.append(res)

    from sklearn import svm   
    classifier = svm.SVC(probability=True)
    classifier.fit(samples, target)
项目:johnson-county-ddj-public    作者:dssg    | 项目源码 | 文件源码
def run(self):
        training_x, training_y, training_ids = self.get_training_data()
        test_x, test_y, test_ids = self.get_test_data()
        clf = self.define_model(self.model_name, self.model_params)
        clf.fit(training_x, training_y)
        res_predict = clf.predict(test_x)
        if (self.model_name == "SGDClassifier" and (clf.loss =="hinge" or clf.loss == "perceptron")) or self.model_name == "linear.SVC":
            res = list(clf.decision_function(test_x))
        else:
            res = list(clf.predict_proba(test_x)[:,1])
        #fp, fn, tp, tn = self.compute_confusion_matrix(res[:,0], test_y)
        result_dictionary = {'training_ids': training_ids, 
                             'predictions_test_y': list(res_predict),
                             'prob_prediction_test_y': res ,
                             'test_y': list(test_y),
                             'test_ids': list(test_ids),
                             'model_name': self.model_name,
                             'model_params': self.model_params,
                             'label': self.label,
                             'feature_columns_used': self.cols_to_use,
                             'config': self.config,
                             'feature_importance': self.get_feature_importance(clf, self.model_name),
                             'columned_used_for_feat_importance': list(training_x.columns.values)}
        return  result_dictionary, clf
项目:MixtureOfExperts    作者:krishnakalyan3    | 项目源码 | 文件源码
def svc_model(self, X, y, x_test, y_test, x_val, y_val, i, j):
        X, y = shuffle(X, y, random_state=self.SEED)
        clf = SVC(C=self.C, kernel='rbf', gamma=self.gamma, cache_size=self.cache_size,
                  verbose=0, random_state=self.SEED)
        model = clf.fit(X, y)

        yhat_train = model.predict(X)
        yhat_val = model.predict(x_val)
        yhat_test = model.predict(x_test)

        train_error = (1 - accuracy_score(y, yhat_train)) * 100
        val_error = (1 - accuracy_score(y_val, yhat_val)) * 100
        test_error = (1 - accuracy_score(y_test, yhat_test)) * 100

        self.warn_log.append([i, train_error, val_error, test_error])

        return model
项目:AirTicketPredicting    作者:junlulocky    | 项目源码 | 文件源码
def __init__(self, isTrain, isOutlierRemoval=0):
        """
        The linear models ``LinearSVC()`` and ``SVC(kernel='linear')`` yield slightly
        different decision boundaries. This can be a consequence of the following
        differences:
        - ``LinearSVC`` minimizes the squared hinge loss while ``SVC`` minimizes the
          regular hinge loss.

        - ``LinearSVC`` uses the One-vs-All (also known as One-vs-Rest) multiclass
          reduction while ``SVC`` uses the One-vs-One multiclass reduction.
        :return:
        """
        super(ClassificationSVM, self).__init__(isTrain, isOutlierRemoval)

        # data preprocessing
        self.dataPreprocessing()
        self.clf = svm.SVC() # define the SVM classifier

        C = 1.0  # SVM regularization parameter
        self.svc = svm.SVC(kernel='linear', C=C, max_iter=100000)
        self.rbf_svc = svm.SVC(kernel='rbf', gamma=0.7, C=C)
        self.poly_svc = svm.SVC(kernel='poly', coef0=1, degree=3, C=C)
        self.lin_svc = svm.LinearSVC(C=C)
项目:AirTicketPredicting    作者:junlulocky    | 项目源码 | 文件源码
def parameterChoosing(self):
        # Set the parameters by cross-validation
        tuned_parameters = [{'kernel': ['rbf'],
                             'gamma': np.logspace(-4, 3, 30),
                             'C': [1e-3, 1e-2, 1e-1, 1, 10, 100, 1000]},
                             {'kernel': ['poly'],
                              'degree': [1, 2, 3, 4],
                              'C': [1e-3, 1e-2, 1e-1, 1, 10, 100, 1000],
                              'coef0': np.logspace(-4, 3, 30)},
                            {'kernel': ['linear'],
                             'C': [1e-3, 1e-2, 1e-1, 1, 10, 100, 1000]}]

        clf = GridSearchCV(svm.SVC(C=1), tuned_parameters, cv=5, scoring='precision_weighted')
        clf.fit(self.X_train, self.y_train.ravel())

        print "Best parameters set found on development set:\n"
        print clf.best_params_

        print "Grid scores on development set:\n"
        for params, mean_score, scores in clf.grid_scores_:
            print "%0.3f (+/-%0.03f) for %r\n" % (mean_score, scores.std() * 2, params)

        print "Detailed classification report:\n"
        y_true, y_pred = self.y_test, clf.predict(self.X_test)
        print classification_report(y_true, y_pred)
项目:LINE    作者:VahidooX    | 项目源码 | 文件源码
def svm_classify(X, label, split_ratios, C):
    """
    trains a linear SVM on the data
    input C specifies the penalty factor for SVM
    """
    train_size = int(len(X)*split_ratios[0])
    val_size = int(len(X)*split_ratios[1])

    train_data, valid_data, test_data = X[0:train_size], X[train_size:train_size + val_size], X[train_size + val_size:]
    train_label, valid_label, test_label = label[0:train_size], label[train_size:train_size + val_size], label[train_size + val_size:]

    print('training SVM...')
    clf = svm.SVC(C=C, kernel='linear')
    clf.fit(train_data, train_label.ravel())

    p = clf.predict(train_data)
    train_acc = accuracy_score(train_label, p)
    p = clf.predict(valid_data)
    valid_acc = accuracy_score(valid_label, p)
    p = clf.predict(test_data)
    test_acc = accuracy_score(test_label, p)

    return [train_acc, valid_acc, test_acc]
项目:CAAPR    作者:Stargrazer82301    | 项目源码 | 文件源码
def setup(self):

        """
        This function ...
        :return:
        """

        # Call the setup of the base class
        super(Classifier, self).setup()

        # Create the vector classifier
        self.vector_classifier = svm.SVC(gamma=0.001, C=100.) # support vector classification

        # Determine the path to the collection directory for the current mode
        collection_mode_path = os.path.join(self.collection_user_path, self.config.mode)

        # Determine the paths to the 'yes' and 'no' saturation collection directories
        self.yes_path = os.path.join(collection_mode_path, "yes")
        self.no_path = os.path.join(collection_mode_path, "no")

        # Determine the path to the classification directory for the current mode
        self.classification_mode_path = os.path.join(self.classification_user_path, self.config.mode)

    # -----------------------------------------------------------------
项目:CAAPR    作者:Stargrazer82301    | 项目源码 | 文件源码
def setup(self):

        """
        This function ...
        :return:
        """

        # Call the setup of the base class
        super(Classifier, self).setup()

        # Create the vector classifier
        self.vector_classifier = svm.SVC(gamma=0.001, C=100.) # support vector classification

        # Determine the path to the collection directory for the current mode
        collection_mode_path = os.path.join(self.collection_user_path, self.config.mode)

        # Determine the paths to the 'yes' and 'no' saturation collection directories
        self.yes_path = os.path.join(collection_mode_path, "yes")
        self.no_path = os.path.join(collection_mode_path, "no")

        # Determine the path to the classification directory for the current mode
        self.classification_mode_path = os.path.join(self.classification_user_path, self.config.mode)

    # -----------------------------------------------------------------
项目:dask-searchcv    作者:dask    | 项目源码 | 文件源码
def test_visualize():
    pytest.importorskip('graphviz')

    X, y = make_classification(n_samples=100, n_classes=2, flip_y=.2,
                               random_state=0)
    clf = SVC(random_state=0)
    grid = {'C': [.1, .5, .9]}
    gs = dcv.GridSearchCV(clf, grid).fit(X, y)

    assert hasattr(gs, 'dask_graph_')

    with tmpdir() as d:
        gs.visualize(filename=os.path.join(d, 'mydask'))
        assert os.path.exists(os.path.join(d, 'mydask.png'))

    # Doesn't work if not fitted
    gs = dcv.GridSearchCV(clf, grid)
    with pytest.raises(NotFittedError):
        gs.visualize()
项目:stock_trend_prediction    作者:r12543    | 项目源码 | 文件源码
def performSVMClass(X_train, y_train, X_test, y_test, fout, savemodel):
    """
    SVM binary Classification
    """
    # c = parameters[0]
    # g =  parameters[1]
    clf = SVC()
    clf.fit(X_train, y_train)

    # if savemodel == True:
    #   fname_out = '{}-{}.pickle'.format(fout, datetime.now())
    #   with open(fname_out, 'wb') as f:
    #       cPickle.dump(clf, f, -1)    

    accuracy = clf.score(X_test, y_test)

    return accuracy
项目:stock_trend_prediction    作者:r12543    | 项目源码 | 文件源码
def performSVMClass(X_train, y_train, X_test, y_test):
    classifier = svm.SVC()
    classifier.fit(X_train, y_train)
    results = classifier.predict(X_test)

    # colors = {1:'red', 0:'blue'}
    # df = pd.DataFrame(dict(adj=X_test[:,5], return_=X_test[:,50], label=results))

    # fig, ax = plt.subplots()
    # colors = {1:'red', 0:'blue'}
    # ax.scatter(df['adj'],df['return_'], c=df['label'].apply(lambda x: colors[x]))
    # # ax.scatter(X_test[:,5], X_test[:,50], c=y_test_list.apply(lambda x: colors[x]))
    # plt.show()
    # print y_pred
    # cm = confusion_matrix(y_test, results)
    # print cm
    # plt.figure()
    # plot_confusion_matrix(cm)
    # plt.show()

    num_correct = (results == y_test).sum()
    recall = num_correct / len(y_test)
    # print "SVM model accuracy (%): ", recall * 100, "%"

    return recall*100
项目:stock_trend_prediction    作者:r12543    | 项目源码 | 文件源码
def performSVMClass(X_train, y_train, X_test, y_test, fout, savemodel):
    """
    SVM binary Classification
    """
    # c = parameters[0]
    # g =  parameters[1]
    clf = SVC()
    clf.fit(X_train, y_train)

    # if savemodel == True:
    #   fname_out = '{}-{}.pickle'.format(fout, datetime.now())
    #   with open(fname_out, 'wb') as f:
    #       cPickle.dump(clf, f, -1)

    accuracy = clf.score(X_test, y_test)

    print "SVM: ", accuracy
项目:US-TransportationMode    作者:vlomonaco    | 项目源码 | 文件源码
def support_vector_machine(self, sensors_set):
        features = list(self.dataset.get_sensors_set_features(sensors_set))
        print("SUPPORT VECTOR MACHINE.....")
        print("CLASSIFICATION BASED ON THESE SENSORS: ", self.dataset.get_remained_sensors(sensors_set))
        print("NUMBER OF FEATURES: ", len(features))
        train_features, train_classes, test_features, test_classes = self.__get_sets_for_classification(
            self.dataset.get_train, self.dataset.get_test, features)
        train_features_scaled, test_features_scaled = util.scale_features(train_features, test_features)

        classifier_svm = SVC(C=const.PAR_SVM_C[sensors_set], gamma=const.PAR_SVM_GAMMA[sensors_set], verbose=False)
        classifier_svm.fit(train_features_scaled, train_classes)
        test_prediction = classifier_svm.predict(test_features_scaled)
        acc = accuracy_score(test_classes, test_prediction)
        print("ACCURACY : " + str(acc))
        print("END SUPPORT VECTOR MACHINE.....")

        if not os.path.exists(const.DIR_RESULTS):
            os.makedirs(const.DIR_RESULTS)
        file_content = "acc\n" + str(acc)
        with open(const.DIR_RESULTS + "/" + str(sensors_set) + const.FILE_SUPPORT_VECTOR_MACHINE_RESULTS, 'w') as f:
            f.write(file_content)

    # use different algorithms changing target classes, try all combination of two target classes
项目:coremltools    作者:apple    | 项目源码 | 文件源码
def test_support_vector_classifier(self):
        for dtype in self.number_data_type.keys():
            scikit_model = SVC(kernel='rbf', gamma=1.2, C=1)
            data = self.scikit_data['data'].astype(dtype)
            target = self.scikit_data['target'].astype(dtype) > self.scikit_data['target'].astype(dtype).mean()
            scikit_model, spec = self._sklearn_setup(scikit_model, dtype, data, target)
            coreml_model = create_model(spec)
            for idx in range(0, 10):
                test_data = data[idx].reshape(1, -1)
                try:
                    self.assertEqual(scikit_model.predict(test_data)[0],
                                     bool(int(coreml_model.predict({'data': test_data})['target'])),
                                     msg="{} != {} for Dtype: {}".format(
                                         scikit_model.predict(test_data)[0],
                                         bool(int(coreml_model.predict({'data': test_data})['target'])),
                                         dtype
                                     )
                                     )
                except RuntimeError:
                    print("{} not supported. ".format(dtype))
项目:data_programming    作者:kep1616    | 项目源码 | 文件源码
def learn(training_data, training_labels, show_score=False, store=False):

    print ("Start Learning....")

    clf = SVC(kernel='linear', probability=True, C=1)

    clf.fit(training_data, training_labels)

    print ("Done Learning.")


    if store:
        print ("Pickling classifier...")
        pickle.dump(clf, open(path_config.CLASSIFIER_PICKLING_FILE, 'wb'))
        print ("Done Pickling.")

    if show_score:
        print ("Scoring classifier ...")
        print ("Data-Level Training Set Prediction Accuracy: %s" % clf.score(training_data, training_labels))
项目:TextStageProcessor    作者:mhyhre    | 项目源码 | 文件源码
def classification_linear_svm(self):
        self.signals.PrintInfo.emit("???????? Linear SVM")
        output_dir = self.output_dir + 'linear_svm_out/'
        if not os.path.exists(output_dir):
            os.makedirs(output_dir)

        vectorizer = HashingVectorizer()
        fdata = vectorizer.fit_transform(self.fdata)
        trainingSet = fdata[:self.split]
        testSet = fdata[self.split:]

        classificator = SVC(kernel="linear", probability=True, C=self.linear_svm_c)
        classificator.fit(trainingSet, self.trainingClass)
        results = classificator.predict(testSet)
        proba = classificator.predict_proba(testSet)

        self.write_results_to_file(output_dir + 'results.csv', results, proba, classificator.classes_, self.test_filenames)
        out_text = self.compile_result_string(results, proba, classificator.classes_, self.test_filenames)
        self.signals.PrintInfo.emit(out_text)
项目:TextStageProcessor    作者:mhyhre    | 项目源码 | 文件源码
def classification_rbf_svm(self):
        self.signals.PrintInfo.emit("RBF SVM")
        output_dir = self.output_dir + 'rbf_svm_out/'
        if not os.path.exists(output_dir):
            os.makedirs(output_dir)

        vectorizer = HashingVectorizer()
        fdata = vectorizer.fit_transform(self.fdata)
        trainingSet = fdata[:self.split]
        testSet = fdata[self.split:]

        classificator = SVC(gamma=2, probability=True, C=self.rbf_svm_c)
        classificator.fit(trainingSet, self.trainingClass)
        results = classificator.predict(testSet)
        proba = classificator.predict_proba(testSet)

        self.write_results_to_file(output_dir + 'results.csv', results, proba, classificator.classes_,self.test_filenames)
        out_text = self.compile_result_string(results, proba, classificator.classes_, self.test_filenames)
        self.signals.PrintInfo.emit(out_text)
项目:OpinionSpam    作者:Coder-Yu    | 项目源码 | 文件源码
def fitAndPredict(self):
        # classifier = LogisticRegression()
        # classifier.fit(self.trainingSet, self.trainingLabel)
        # pred_labels = classifier.predict(self.testSet)
        # print 'Logistic:'
        # print classification_report(self.testLabel, pred_labels)

        self.classifier = SVC()
        self.classifier.fit(self.trainingSet, self.trainingLabel)
        pred_labels = {}
        for user in self.testDict:
            pred_labels[user] = self.classifier.predict([[self.BDS[user]]])
        # print 'SVM:'
        # print classification_report(self.testLabel, pred_labels)

        # classifier = DecisionTreeClassifier(criterion='entropy')
        # classifier.fit(self.trainingSet, self.trainingLabel)
        # pred_labels = classifier.predict(self.testSet)
        # print 'Decision Tree:'
        # print classification_report(self.testLabel, pred_labels)
        # return self.trainingSet, self.trainingLabel, self.testSet, self.testLabel

        return pred_labels
项目:OpinionSpam    作者:Coder-Yu    | 项目源码 | 文件源码
def fitAndPredict(self):
        corpus = self.trainingSet+self.testSet
        dictionary = corpora.Dictionary(corpus)

        corpus = [dictionary.doc2bow(text) for text in corpus]
        text_matrix = gensim.matutils.corpus2dense(corpus, num_terms=len(dictionary.token2id)).T

        if PCA_Applied:
            pca = PCA(n_components=PCA_nComponents)
            text_matrix = pca.fit_transform(text_matrix)

        classifier = LogisticRegression()
        classifier.fit(text_matrix[0:len(self.trainingSet)], self.trainingLabel)
        pred_labels = classifier.predict(text_matrix[len(self.trainingSet):])
        print 'Logistic:'
        print classification_report(self.testLabel, pred_labels)

        classifier = SVC()
        classifier.fit(text_matrix[0:len(self.trainingSet)], self.trainingLabel)
        pred_labels = classifier.predict(text_matrix[len(self.trainingSet):])
        print 'SVM:'
        print classification_report(self.testLabel, pred_labels)
项目:OpinionSpam    作者:Coder-Yu    | 项目源码 | 文件源码
def fitAndPredict(self):
        corpus = self.trainingSet+self.testSet
        dictionary = corpora.Dictionary(corpus)
        corpus = [dictionary.doc2bow(text) for text in corpus]
        model = models.TfidfModel(corpus)
        corpus = [text for text in model[corpus]]
        text_matrix = gensim.matutils.corpus2dense(corpus, num_terms=len(dictionary.token2id)).T

        if PCA_Applied:
            pca = PCA(n_components=PCA_nComponents)
            text_matrix = pca.fit_transform(text_matrix)

        classifier = LogisticRegression()
        classifier.fit(text_matrix[0:len(self.trainingSet)], self.trainingLabel)
        pred_labels = classifier.predict(text_matrix[len(self.trainingSet):])
        print 'Logistic:'
        print classification_report(self.testLabel, pred_labels)

        classifier = SVC()
        classifier.fit(text_matrix[0:len(self.trainingSet)], self.trainingLabel)
        pred_labels = classifier.predict(text_matrix[len(self.trainingSet):])
        print 'SVM:'
        print classification_report(self.testLabel, pred_labels)
项目:OpinionSpam    作者:Coder-Yu    | 项目源码 | 文件源码
def fitAndPredict(self):
        # classifier = LogisticRegression()
        # classifier.fit(self.trainingSet, self.trainingLabel)
        # pred_labels = classifier.predict(self.testSet)
        # print 'Logistic:'
        # print classification_report(self.testLabel, pred_labels)
        pred_labels = {}
        classifier = SVC()
        classifier.fit(self.trainingSet, self.trainingLabel)

        for user in self.testDict:
            pred_labels[user] = classifier.predict([[self.MUD[user], self.RUD[user], self.QUD[user]]])
        # print 'SVM:'
        # print classification_report(self.testLabel, pred_labels)
        return pred_labels

        # classifier = DecisionTreeClassifier(criterion='entropy')
        # classifier.fit(self.trainingSet, self.trainingLabel)
        # pred_labels = classifier.predict(self.testSet)
        # print 'Decision Tree:'
        # print classification_report(self.testLabel, pred_labels)
        # return self.trainingSet, self.trainingLabel, self.testSet, self.testLabel
项目:stock-price-prediction    作者:chinuy    | 项目源码 | 文件源码
def buildModel(dataset, method, parameters):
    """
    Build final model for predicting real testing data
    """
    features = dataset.columns[0:-1]

    if method == 'RNN':
        clf = performRNNlass(dataset[features], dataset['UpDown'])
        return clf

    elif method == 'RF':
        clf = RandomForestClassifier(n_estimators=1000, n_jobs=-1)

    elif method == 'KNN':
        clf = neighbors.KNeighborsClassifier()

    elif method == 'SVM':
        c = parameters[0]
        g =  parameters[1]
        clf = SVC(C=c, gamma=g)

    elif method == 'ADA':
        clf = AdaBoostClassifier()

    return clf.fit(dataset[features], dataset['UpDown'])
项目:Graduation-design    作者:Baichenjia    | 项目源码 | 文件源码
def Training_model():
    #????????????
    f = open("f://emotion/mysite/weibo_emotion/emotion_file/data_count.txt")   # ???????????
    f.readline()   # ????
    data = np.loadtxt(f)
    #?????????
    f1 = open("f://emotion/mysite/weibo_emotion/emotion_file/data_jixing.txt")
    leibie = np.loadtxt(f1)
    f.close()
    f1.close()

    #TF-IDF??
    transformer = TfidfTransformer()
    tfidf = transformer.fit_transform(data)
    data1 = tfidf.toarray()

    #SVM?????
    clf = svm.SVC()   # class
    clf.fit(data1, leibie)    # training the svc model
    return clf
项目:LogoDetectionInVideo    作者:nmemme    | 项目源码 | 文件源码
def train():
    training_set=[]
    training_labels=[]
    os.chdir("/Users/muyunyan/Desktop/EC500FINAL/logo/")
    counter=0
    a=os.listdir(".")
    for i in a:
     os.chdir(i)
     print(i)
     for d in os.listdir("."):
         img = cv2.imread(d)
         res=cv2.resize(img,(250,250))
         gray_image = cv2.cvtColor(res, cv2.COLOR_BGR2GRAY)
         xarr=np.squeeze(np.array(gray_image).astype(np.float32))
         m,v=cv2.PCACompute(xarr)
         arr= np.array(v)
         flat_arr= arr.ravel()
         training_set.append(flat_arr)
         training_labels.append(i)
     os.chdir("..")
     trainData=training_set
     responses=training_labels
     svm = svm.SVC()
     svm.fit(trainData,responses)
     return svm
项目:Steal-ML    作者:ftramer    | 项目源码 | 文件源码
def CAL_v(name, label_p, label_n, oracle, n_features, ftype, test_x, test_y):
    online = OnlineBase(name, label_p, label_n, oracle, n_features, ftype, error=.5)
    x, y = online.collect_pts(100, -1)
    i = 0
    q = online.get_n_query()
    C_range = np.logspace(-2, 5, 10, base=10)
    gamma_range = np.logspace(-5, 1, 10, base=10)
    param_grid = dict(gamma=gamma_range, C=C_range)
    while q < 3500:
        i += 1
        # h_ = ex.fit(x, y)

        cv = StratifiedShuffleSplit(y, n_iter=5, test_size=0.2, random_state=42)
        grid = GridSearchCV(svm.SVC(), param_grid=param_grid, cv=cv, verbose=0, n_jobs=-1)
        grid.fit(x, y)
        h_ = grid.best_estimator_

        online_ = OnlineBase('', label_p, label_n, h_.predict, n_features, ftype, error=.1)
        x_, _ = online_.collect_pts(10, 200)
        if x_ is not None and len(x_) > 0:
            x.extend(x_)
            y.extend(oracle(x_))
        q += online_.get_n_query()
        pred_y = h_.predict(test_x)
        print len(x), q, sm.accuracy_score(test_y, pred_y)
项目:Steal-ML    作者:ftramer    | 项目源码 | 文件源码
def grid_retrain_in_x(self):
        gamma_range = np.logspace(-15, 3, 19, base=2)
        param_grid = dict(gamma=gamma_range)

        if len(np.unique(self.y_ex)) < 2:
            return 1, 1

        try:
            cv = StratifiedShuffleSplit(self.y_ex, n_iter=5, test_size=.2)
            grid = GridSearchCV(SVC(C=1e5), param_grid=param_grid, cv=cv, n_jobs=-1)

            grid.fit(self.X_ex, self.y_ex)
            rbf_svc2 = grid.best_estimator_
        except ValueError:
            rbf_svc2 = SVC(C=1e5)
            rbf_svc2.fit(self.X_ex, self.y_ex)

        self.set_clf2(rbf_svc2)
        return self.benchmark()
项目:Steal-ML    作者:ftramer    | 项目源码 | 文件源码
def grid_search(self):
        C_range = np.logspace(-5, 15, 21, base=2)
        param_grid = dict(C=C_range)
        cv = StratifiedShuffleSplit(self.y_ex, n_iter=5, test_size=0.2, random_state=42)
        grid = GridSearchCV(SVC(kernel='poly', max_iter=10000), param_grid=param_grid, cv=cv, n_jobs=1, verbose=0)

        logger.info('start grid search for Linear')
        grid.fit(self.X_ex, self.y_ex)
        logger.info('end grid search for Linear')

        scores = [x[1] for x in grid.grid_scores_]

        # final train
        clf = grid.best_estimator_

        pred_train = clf.predict(self.X_ex)
        pred_val = clf.predict(self.val_x)
        pred_test = clf.predict(self.test_x)

        r = Result(self.name + ' (X)', 'Poly', len(self.X_ex),
                   sm.accuracy_score(self.y_ex, pred_train),
                   sm.accuracy_score(self.val_y, pred_val),
                   sm.accuracy_score(self.test_y, pred_test))
        return r
项目:Parkinsons-Vocal-Analysis-Model    作者:WilliamY97    | 项目源码 | 文件源码
def fit_model(X, y):

    classifier = svm.SVC()

    parameters = {'kernel':['poly', 'rbf', 'sigmoid'], 'degree':[1, 2, 3], 'C':[0.1, 1, 10]}


    f1_scorer = make_scorer(performance_metric,
                                   greater_is_better=True)

    clf = GridSearchCV(classifier,
                       param_grid=parameters,
                       scoring=f1_scorer)

    clf.fit(X, y)

    return clf


# Read student data
项目:algo-trading-pipeline    作者:NeuralKnot    | 项目源码 | 文件源码
def create_model(self, training_articles):
        model = OneVsRestClassifier(svm.SVC(probability=True))

        features = []
        labels = []
        i = 0
        for article in training_articles:
            print("Generating features for article " + str(i) + "...")
            google_cloud_response = self.analyze_text_google_cloud(article["article"])
            relevant_entities = self.get_relevant_entities(google_cloud_response["entities"], article["market"]["entities"], article["market"]["wikipedia_urls"])

            # Only count this article if a relevant entity is present
            if relevant_entities:
                article_features = self.article_features(relevant_entities, article["market"], google_cloud_response, article["article"])
                features.append(article_features)
                labels.append(article["label"])
            else:
                print("Skipping article " + str(i) + "...")

            i = i + 1

        print("Performing feature scaling...")
        scaler = preprocessing.StandardScaler().fit(features)
        features_scaled = scaler.transform(features)

        print("Fitting model...")
        model.fit(features_scaled, labels)

        print("Saving model...")
        joblib.dump(scaler, "data_analysis/caler.pkl")
        joblib.dump(model, "data_analysis/model.pkl")

        print("Done!")

    # For use in prod
项目:ISM2017    作者:ybayle    | 项目源码 | 文件源码
def classify(train=None, test=None, data=None, res_dir="res/", disp=True, outfilename=None):
    """Description of compare
    compare multiple classifier and display the best one
    """
    utils.print_success("Comparison of differents classifiers")
    if data is not None:
        train_features = data["train_features"]
        train_groundtruths = data["train_groundtruths"]
        test_features = data["test_features"]
        test_groundtruths = data["test_groundtruths"]
    else:
        train = utils.abs_path_file(train)
        test = utils.abs_path_file(test)
        train_features, train_groundtruths = read_file(train)
        test_features, test_groundtruths = read_file(test)
    if not utils.create_dir(res_dir):
        res_dir = utils.abs_path_dir(res_dir)
    classifiers = {
        "RandomForest": RandomForestClassifier(n_jobs=-1)
        # "RandomForest": RandomForestClassifier(n_estimators=5),
        # "KNeighbors":KNeighborsClassifier(3),
        # "GaussianProcess":GaussianProcessClassifier(1.0 * RBF(1.0), warm_start=True),
        # "DecisionTree":DecisionTreeClassifier(max_depth=5),
        # "MLP":MLPClassifier(),
        # "AdaBoost":AdaBoostClassifier(),
        # "GaussianNB":GaussianNB(),
        # "QDA":QuadraticDiscriminantAnalysis(),
        # "SVM":SVC(kernel="linear", C=0.025),
        # "GradientBoosting":GradientBoostingClassifier(),
        # "ExtraTrees":ExtraTreesClassifier(),
        # "LogisticRegression":LogisticRegression(),
        # "LinearDiscriminantAnalysis":LinearDiscriminantAnalysis()
    }
    for key in classifiers:
        utils.print_success(key)
        clf = classifiers[key]
        utils.print_info("\tFit")
        clf.fit(train_features, train_groundtruths)
        utils.print_info("\tPredict")
        predictions = clf.predict(test_features)
    return predictions
项目:code-uai16    作者:thanhan    | 项目源码 | 文件源码
def classify(n = 50):
    #clf = MultinomialNB(fit_prior=False)
    #clf = SVC(gamma=2, C=1, class_weight = {0.0:0.063829777, 1.0:1.0})
    clf = SGDClassifier(loss="log", penalty="l1", class_weight = {0.0:0.022, 1.0:1.0})

    clf.fit(mat[:n], rel[:n])
    return clf
项目:Kaggle    作者:lawlite19    | 项目源码 | 文件源码
def baseline_svm():
    train_data = pd.read_csv(r"data/train.csv")
    print u"?????\n",train_data.info()
    print u'?????\n',train_data.describe()  
    #display_data(train_data)  # ????????
    #display_with_process(train_data) # ??????????????????,????
    process_data = pre_processData(train_data,'process_train_data')  # ????????????
    train_data = process_data.filter(regex='Survived|Age|SibSp|Parch|Fare|Cabin_.*|Embarked_.*|Sex_.*|Pclass_.*')  # ???????????
    train_np = train_data.as_matrix()  # ????
    '''??model'''
    X = train_np[:,1:]
    y = train_np[:,0]
    model = svm.SVC(C=1.0,tol=1e-6).fit(X,y)
    # print pd.DataFrame({"columns":list(train_data.columns)[1:],"coef_":list(model.coef_.T)})

    '''??????'''
    test_data = pd.read_csv(r"data/test.csv")
    process_test_data = pre_processData(test_data,'process_test_data')  # ?????
    test_data = process_test_data.filter(regex='Age|SibSp|Parch|Fare|Cabin_.*|Embarked_.*|Sex_.*|Pclass_.*')
    test_np = test_data.as_matrix()
    predict = model.predict(test_np)
    result = pd.DataFrame(data={'PassengerId':process_test_data['PassengerId'].as_matrix(),'Survived':predict.astype(np.int32)})
    result.to_csv(r'baseline_svm_result/prediction.csv',index=False)    



# baseline???????——0.76077
项目:Kaggle    作者:lawlite19    | 项目源码 | 文件源码
def baseline_svm_crossValidate():
    origin_train_data = pd.read_csv(r"data/train.csv")
    process_data = pre_processData(origin_train_data,'process_train_data')  # ????????????
    process_data_train,process_data_cv = train_test_split(process_data,test_size=0.2)   
    train_data = process_data_train.filter(regex='Survived|Age|SibSp|Parch|Fare|Cabin_.*|Embarked_.*|Sex_.*|Pclass_.*')  # ???????????
    train_np = train_data.as_matrix()  # ????
    '''??model'''
    X_train = train_np[:,1:]
    y_train = train_np[:,0]
    model = svm.SVC(kernel='rbf',tol=1e-6).fit(X_train,y_train)
    #print pd.DataFrame({"columns":list(train_data.columns)[1:],"coef_":list(model.coef_.T)})
    cv_data = process_data_cv.filter(regex='Survived|Age|SibSp|Parch|Fare|Cabin_.*|Embarked_.*|Sex_.*|Pclass_.*')
    cv_np = cv_data.as_matrix()
    X_cv = cv_np[:,1:]
    y_cv = cv_np[:,0]
    predictions = model.predict(X_cv)
    print np.float32(np.sum(predictions == y_cv))/np.float32(predictions.shape[0])

    error_items = origin_train_data.loc[origin_train_data['PassengerId'].isin(process_data_cv[predictions != y_cv]['PassengerId'].values)]
    predictions_item = pd.DataFrame(data=process_data_cv[predictions != y_cv]['PassengerId'])
    predictions_item.columns=['error_PassengerId']
    # error_items = error_items.reset_index(drop=True)
    error_result = pd.concat([error_items,predictions_item],axis=1)
    error_result.to_csv(r'error.csv',index=False)


    '''??????'''
    '''test_data = pd.read_csv(r"data/test.csv")
    process_test_data = pre_processData(test_data,'process_test_data',optimize=False)  # ?????
    test_data = process_test_data.filter(regex='Age|SibSp|Parch|Fare|Cabin_.*|Embarked_.*|Sex_.*|Pclass_.*')
    test_np = test_data.as_matrix()
    predict = model.predict(test_np)
    result = pd.DataFrame(data={'PassengerId':process_test_data['PassengerId'].as_matrix(),'Survived':predict.astype(np.int32)})
    result.to_csv(r'svm_result/prediction.csv',index=False)'''



# baseline crossValidate???????——??????
项目:deeppavlov    作者:deepmipt    | 项目源码 | 文件源码
def svc_model(self):
        model = svm.SVC(probability=True, C=0.3, kernel='linear')
        return model
项目:EmotiW-2017-Audio-video-Emotion-Recognition    作者:xujinchang    | 项目源码 | 文件源码
def use_SVM(X_data,y_data):
    p_gamma = 0.1
    p_C = 10
    svm = SVC(kernel = 'rbf',random_state=0, gamma=p_gamma ,C=p_C, probability=True)
    svm.fit(X_data,y_data)
    joblib.dump(svm,"./sklearn_model/svm_trainval1_{param1}_{param2}".format(param1 = p_gamma,param2 = p_C))
    return svm
项目:Wall-EEG    作者:neurotechuoft    | 项目源码 | 文件源码
def classifier_train(feature_matrix_0, feature_matrix_1, algorithm = 'SVM'):
    """
    Trains a binary classifier using the SVM algorithm with the following parameters

    Arguments
    feature_matrix_0: Matrix with examples for Class 0
    feature_matrix_0: Matrix with examples for Class 1
    algorithm: Currently only SVM is supported

    Outputs
    classfier: trained classifier (scikit object)
    mu_ft, std_ft: normalization parameters for the data
    """
    # Create vector Y (class labels)
    class0 = np.zeros((feature_matrix_0.shape[0],1))
    class1 = np.ones((feature_matrix_1.shape[0],1))

    # Concatenate feature matrices and their respective labels
    y = np.concatenate((class0, class1),axis=0)
    features_all = np.concatenate((feature_matrix_0, feature_matrix_1),axis=0)

    # Normalize inputs
    mu_ft = np.mean(features_all)
    std_ft = np.std(features_all)
    X = (features_all - mu_ft) / std_ft

    # Train SVM, using default parameters     
    classifier = svm.SVC()
    classifier.fit(X, y)

    return classifier, mu_ft, std_ft