我们从Python开源项目中,提取了以下13个代码示例,用于说明如何使用scipy.special.logit()。
def run_lasso(X, y, max_iter=3000, cv=5, n_threads=1): """ Implement LassoCV in sklearn Args: X (np.array): scaled X. y (pd.df): four columns response table. max_iter (int): max iteration. cv (int): CV fold. n_threads (int): Number of threads to use for parallel computing. Returns: float: trained alpha value. """ logger.info('Implementing LassoCV with {} iter. and {}-fold CV'.format(max_iter, cv)) # generate logit response y_logit = logit((y.nMut + 0.5) / (y.length * y.N)) # sub-sampling X and y (300,000) use_ix = np.random.choice(y_logit.shape[0], 300000, replace=False) Xsub = X[use_ix, :] ysub = y_logit[use_ix] reg = LassoCV(max_iter=max_iter, cv=cv, copy_X=False, n_jobs=n_threads) lassocv = reg.fit(Xsub, ysub) logger.info('LassoCV alpha = {}'.format(lassocv.alpha_)) return lassocv.alpha_
def find_coeffs_bin(self, budget): k = len(self.classes) # number of classes assert k == 2 n = self.num_features() # vector dimension X_train = self.gen_query_set(n, budget) y = logit(self.query_probas(X_train)[:, 1]) X = np.hstack((X_train, np.ones((budget, 1)))) if budget == n+1: try: w_opt = np.linalg.solve(X, y).T except np.linalg.linalg.LinAlgError: w_opt = np.linalg.lstsq(X, y)[0].T else: w_opt = np.linalg.lstsq(X, y)[0].T int_opt = w_opt[-1] w_opt = np.array([w_opt[:-1]]) self.X_train = X_train return w_opt, int_opt
def predict(st,norm,bounds): rew = np.log(1+ (st[:,-1:])) a_x = bounds[0] b_x = bounds[2] eps = 1e-5 rew = np.clip(rew,a_x+eps,b_x-eps) rew = logit((rew - a_x) / (b_x - a_x)) st [:,-1:] = rew State = np.zeros((1,61)) State[0,:] = np.hstack((st[0,0],st[:,[1,2,3,-1]].ravel())) X = (State - norm[0]) / norm[1] return np.round(policy_network(X)[0,:],4)
def test_logistic_lmm(): df = pd.read_csv(os.path.join(get_resource_path(),'sample_data.csv')) model = Lmer('DV_l ~ IV1+ (IV1|Group)',data=df,family='binomial') model.fit(summarize=False) assert model.coefs.shape == (2,13) estimates = np.array([-0.16098421, 0.00296261]) assert np.allclose(model.coefs['Estimate'],estimates,atol=.001) assert isinstance(model.fixef,pd.core.frame.DataFrame) assert model.fixef.shape == (47,2) assert isinstance(model.ranef,pd.core.frame.DataFrame) assert model.ranef.shape == (47,2) assert np.allclose(model.coefs.loc[:,'Estimate'],model.fixef.mean(),atol=.01) # Test prediction assert np.allclose(model.predict(model.data,use_rfx=True),model.data.fits) assert np.allclose(model.predict(model.data,use_rfx=True,pred_type='link'),logit(model.data.fits))
def compute_sgd(data): logging.info('Computing SGD') n_splits = 10 folder = StratifiedKFold(n_splits=n_splits, shuffle=True) for ix_first, ix_second in tqdm_notebook(folder.split(np.zeros(data['y_train'].shape[0]), data['y_train']), total=n_splits): # {'en__l1_ratio': 0.0001, 'en__alpha': 1e-05} model = SGDClassifier( loss='log', penalty='elasticnet', fit_intercept=True, n_iter=100, shuffle=True, n_jobs=-1, l1_ratio=0.0001, alpha=1e-05, class_weight=None) model = model.fit(data['X_train'][ix_first, :], data['y_train'][ix_first]) data['y_train_pred'][ix_second] = logit(model.predict_proba(data['X_train'][ix_second, :])[:, 1]) data['y_test_pred'].append(logit(model.predict_proba(data['X_test'])[:, 1])) data['y_test_pred'] = np.array(data['y_test_pred']).T.mean(axis=1) return data
def preprocess_feature(self, feature, parameters): is_not_empty = 1 - np.isclose(feature, normalization.MISSING_VALUE) if parameters.feature_type == identify_types.BINARY: # Binary features are always 1 unless they are 0 return ((feature != 0) * is_not_empty).astype(np.float32) if parameters.boxcox_lambda is not None: feature = stats.boxcox( np.maximum( feature + parameters.boxcox_shift, normalization.BOX_COX_MARGIN ), parameters.boxcox_lambda ) # No *= to ensure consistent out-of-place operation. if parameters.feature_type == identify_types.PROBABILITY: feature = np.clip(feature, 0.01, 0.99) feature = special.logit(feature) elif parameters.feature_type == identify_types.QUANTILE: quantiles = parameters.quantiles values = np.zeros(feature.shape) for quantile in quantiles: values += feature >= quantile feature = values / float(len(quantiles)) elif parameters.feature_type == identify_types.ENUM: possible_values = parameters.possible_values mapping = {} for i, possible_value in enumerate(possible_values): mapping[possible_value] = i output_feature = np.zeros((len(feature), len(possible_values))) for i, val in enumerate(feature): output_feature[i][mapping[val]] = 1.0 return output_feature else: feature = feature - parameters.mean feature /= parameters.stddev feature *= is_not_empty return feature
def run_rndlasso(X, y, alpha, n_resampling=500, sample_fraction=0.1, n_threads=1): """ Implement Randomized Lasso in sklearn Args: X (np.array): scaled X. y (pd.df): four columns response table. alpha (float): parameter trained from lassoCV n_resampling (int): number of times for resampling sample_fraction (float): fraction of data to use at each resampling Returns: np.array: feature importance scores """ logger.info('Implementing Randomized Lasso with alpha={}, n_resampling={} and sample_fraction={}'. format(alpha, n_resampling, sample_fraction)) # generate logit response y_logit = logit((y.nMut + 0.5) / (y.length * y.N)) reg = RandomizedLasso(alpha=alpha, n_resampling=n_resampling, sample_fraction=sample_fraction, selection_threshold=1e-3, max_iter=3000, normalize=False, n_jobs=n_threads) rndlasso = reg.fit(X, y_logit) fi_scores = rndlasso.scores_ return fi_scores
def itransform(self, y_transformed): yscale = logit(y_transformed) return (yscale / self.scale)
def munge_scoreses(scoreses, df): npredictors = len(scoreses) score_shape = (len(df), npredictors) scores = np.empty(score_shape, dtype=np.float32) # Yay, nested loops :/ i = 0 for (uid, pid) in df[ ['uid', 'pid'] ].itertuples(index=False): for predictor_ix, pdict in enumerate(scoreses): prob = pdict[uid][pid] scores[i, predictor_ix] = logit(prob) i += 1 return scores
def vectorize_fold(fold, tags, meta_df, use_metafeats=True): with time_me('Loaded pdicts'): scoreses = [common.pdict_for_tag(tag, fold) for tag in tags] df = meta_df[meta_df['fold']==fold] assert len(df) y = df['label'] n_predictors = len(scoreses) with time_me('Munged scores for {} predictors'.format(n_predictors), mode='print'): # TODO: could use the logit loading fn added to user_wrapper module scores = munge_scoreses(scoreses, df) if not use_metafeats: X = scores else: meta_cols = metavectorize.metafeature_columns meta = df[meta_cols].values # Special f_0 dummy meta feature for learning vanilla weight term per predictor metafeats = np.hstack([np.ones( (len(df), 1) ), meta]) # Oh fuck this, I've spent too long trying to understand np.einsum... # (Worth noting that sklearn.preprocessing has a 'PolynomialFeatures' utility # that might have been useful here. But this is fine.) n_metafeats = metafeats.shape[1] logging.info('{} predictors x {} metafeatures -> {} coefs'.format( n_predictors, n_metafeats, n_predictors*n_metafeats)) # X is 'metafeat major'. i.e. the first n_p values for each vector are the # raw scores for each predictor, they're followed by each predictor's score # multiplied by the first metafeature and so on. X = np.tile(scores, n_metafeats) * np.repeat(metafeats, n_predictors, axis=1) return X, y
def fit(self, X, s): _x = np.ones((X.shape[0], X.shape[1] + 1)) _x[:, : - 1] = X self.w, _, _, _ = np.linalg.lstsq(_x, logit(s))
def testGetLogitsAndProbsLogits(self): p = np.array([0.01, 0.2, 0.5, 0.7, .99], dtype=np.float32) logits = special.logit(p) with self.test_session(): new_logits, new_p = distribution_util.get_logits_and_probs( logits=logits, validate_args=True) self.assertAllClose(p, new_p.eval()) self.assertAllClose(logits, new_logits.eval())
def testGetLogitsAndProbsProbability(self): p = np.array([0.01, 0.2, 0.5, 0.7, .99], dtype=np.float32) with self.test_session(): new_logits, new_p = distribution_util.get_logits_and_probs( probs=p, validate_args=True) self.assertAllClose(special.logit(p), new_logits.eval()) self.assertAllClose(p, new_p.eval())