我们从Python开源项目中,提取了以下15个代码示例,用于说明如何使用scipy.stats.hmean()。
def computeF1_macro(confusion_matrix,matching, num_clusters): """ computes the macro F1 score confusion matrix : requres permutation matching according to which matrix must be permuted """ ##Permute the matrix columns permuted_confusion_matrix = np.zeros([num_clusters,num_clusters]) for cluster in xrange(num_clusters): matched_cluster = matching[cluster] permuted_confusion_matrix[:,cluster] = confusion_matrix[:,matched_cluster] ##Compute the F1 score for every cluster F1_score = 0 for cluster in xrange(num_clusters): TP = permuted_confusion_matrix[cluster,cluster] FP = np.sum(permuted_confusion_matrix[:,cluster]) - TP FN = np.sum(permuted_confusion_matrix[cluster,:]) - TP precision = TP/(TP + FP) recall = TP/(TP + FN) f1 = stats.hmean([precision,recall]) F1_score += f1 F1_score /= num_clusters return F1_score
def _computer_harmoic_mean_of_probabilities_over_non_zero_in_category_count_terms(self, cat_word_counts, p_category_given_word, p_word_given_category, scaler): df = pd.DataFrame({ 'cat_word_counts': cat_word_counts, 'p_word_given_category': p_word_given_category, 'p_category_given_word': p_category_given_word }) df_with_count = df[df['cat_word_counts'] > 0] df_with_count['scale p_word_given_category'] = scaler(df_with_count['p_word_given_category']) df_with_count['scale p_category_given_word'] = scaler(df_with_count['p_category_given_word']) df['scale p_word_given_category'] = 0 df.loc[df_with_count.index, 'scale p_word_given_category'] = df_with_count['scale p_word_given_category'] df['scale p_category_given_word'] = 0 df.loc[df_with_count.index, 'scale p_category_given_word'] \ = df_with_count['scale p_category_given_word'] score = hmean([df_with_count['scale p_category_given_word'], df_with_count['scale p_word_given_category']]) df['score'] = 0 df.loc[df_with_count.index, 'score'] = score return df['score']
def compute_semeval_score(pearson_score, spearman_score): """ Return NaN if a dataset can't be evaluated on a given frame. Return 0 if at least one similarity measure was 0 or negative. Otherwise, take a harmonic mean of a Pearson correlation coefficient and a Spearman correlation coefficient. """ intervals = ['acc', 'low', 'high'] scores = [] for interval in intervals: if any(np.isnan(x) for x in [spearman_score[interval], pearson_score[interval]]): scores.append(float('NaN')) elif any(x <= 0 for x in [spearman_score[interval], pearson_score[interval]]): scores.append(0) else: scores.append(hmean([spearman_score[interval], pearson_score[interval]])) return pd.Series( scores, index=intervals )
def calcWeightage(Elo_count) : Rrel = 0.4 Rran = 1. - Rrel hm = stats.hmean(Elo_count) - 10 elo_confidence = 0.4 * (1 - math.exp(-hm / 2) ) elo_contri = Rran * (elo_confidence) pr_contri = Rran - elo_contri return (Rrel, pr_contri, elo_contri)
def computeF1_macro(confusion_matrix,matching, num_clusters): """ computes the macro F1 score confusion matrix : requres permutation matching according to which matrix must be permuted """ ##Permute the matrix columns permuted_confusion_matrix = np.zeros([num_clusters,num_clusters]) for cluster in xrange(num_clusters): matched_cluster = matching[cluster] permuted_confusion_matrix[:,cluster] = confusion_matrix[:,matched_cluster] ##Compute the F1 score for every cluster F1_score = 0 for cluster in xrange(num_clusters): TP = permuted_confusion_matrix[cluster,cluster] FP = np.sum(permuted_confusion_matrix[:,cluster]) - TP FN = np.sum(permuted_confusion_matrix[cluster,:]) - TP precision = TP/(TP + FP) recall = TP/(TP + FN) f1 = stats.hmean([precision,recall]) F1_score += f1 F1_score /= num_clusters return F1_score ############ ##The basic folder to be created
def check_coarsening_method(methods): accepted_methods = ['min', 'max', 'amean', 'hmean', 'gmean', 'median'] if methods is not None: for method in methods: if method not in accepted_methods: raise ValueError( ' Coarsening method {0} is not implemented..\ \n Use these: {1}'.format(method, accepted_methods) ) return methods else: return accepted_methods
def my_metrics(y_test, y_pred): cm=confusion_matrix(y_test, y_pred) #Be careful! Sklearn confusion matrix is very confusing! TN = cm[0][0] FP = cm[0][1] FN = cm[1][0] TP = cm[1][1] if FN == 0: FN = 1 if TN == 0: TN = 1 #Proportion of those identified as negative that actually are. unprecision = TN/(TN+FN) #print unprecision #Proportion of those *actually* negative identified as such. unrecall = TN/(FP+TN) #print unrecall #Get harmonic mean unf = hmean([unrecall, unprecision]) #print unf #Get mean of this and the f1 score: harmonic_af = np.mean([f1_score(y_test, y_pred), unf]) return harmonic_af
def optimize_weights(func, *args): """ Both eval_pairwise_analogies() and eval_semeval2012_analogies() have three weights that can be tuned (and therefore two free parameters, as the total weight does not matter): - The *direct weight*, comparing (b2 - a2) to (b1 - a1) - The *transpose weight*, comparing (b2 - b1) to (a2 - a1) - The *similarity weight*, comparing b2 to b1 and a2 to a1 This function takes a function for which to optimize the weights as an argument and returns the optimal weights, `weight_direct` and `weight_transpose`. """ print('Tuning analogy weights') weights = [ 0., 0.05, 0.1, 0.15, 0.2, 0.3, 0.35, 0.4, 0.5, 0.6, 0.65, 0.7, 0.8, 0.9, 1.0, 1.5, 2.0, 2.5, 3.0 ] best_weights = None best_acc = 0. for weight_direct in weights: for weight_transpose in weights: scores = func(*args, weight_direct, weight_transpose, subset='dev') if isinstance(scores, list): # If a function to optimize returns two results, like eval_semeval2012_analogies(), # take their harmonic mean to compute the weights optimal for both results acc = hmean([scores[0].loc['acc'], scores[1].loc['acc']]) else: acc = scores.loc['acc'] if acc > best_acc: print(weight_direct, weight_transpose, acc) best_weights = (weight_direct, weight_transpose) best_acc = acc elif acc == best_acc: print(weight_direct, weight_transpose, acc) weight_direct, weight_transpose = best_weights print() return weight_direct, weight_transpose
def calculate_likelihood(sentence): p=1 v = list() for t, char in enumerate(sentence): x = np.zeros((1, t+1, len(chars))) t2 = len(sentence)-(t+1) x2 = np.zeros((1, t2+1, len(chars))) x[0, 0, char_indices['{']] = 1. x2[0, t2, char_indices['}']] = 1. for i in range(t): x[0, i+1, char_indices[sentence[i]]] = 1. for i in range(t2): x2[0, i, char_indices[sentence[t+i+1]]] = 1. preds = model.predict([x,x2], verbose=0)[0] #print x #print preds #print char #print "char: %s" % preds[char_indices[char]] p=p*preds[char_indices[char]] v.append(preds[char_indices[char]]) #print "agg: %s." % p #print x = np.zeros((1, len(sentence)+1, len(chars))) t2 = len(sentence)-(len(sentence)+1) x2 = np.zeros((1, 1, len(chars))) x2[0, 0, char_indices[' ']] = 1. x[0, 0, char_indices['{']] = 1. for i in range(len(sentence)): x[0, i+1, char_indices[sentence[i]]] = 1. preds = model.predict([x,x2], verbose=0)[0] #print x #print preds #print 'end' #print "char: %s" % preds[char_indices['}']] p=p*preds[char_indices['}']] v.append(preds[char_indices['}']]) #print "agg: %s" % p #print "avg: %s" % (sum(v)/len(v)) #print "min: %s" % min(v) #print #return hmean(v) try: return min(v) except ValueError: return 0
def _PerformDataCoarsening(self, Chrom, resolution, coarsening_method): """Base method to perform Data coarsening. This method read temporary Numpy array files and perform data coarsening using the given input method. .. warning:: **Private method**. Use it at your own risk. It is used internally in :meth:`WigHandler._StoreInHdf5File`. Parameters ---------- Chrom : str Chromosome name resolution : str resolution in word. coarsening_method : str Name of method to use for data coarsening. Accepted keywords: min, max, median, amean, gmean and hmean. """ output = [] binsize = util.resolutionToBinsize(resolution) size = self.chromSizeInfo[Chrom] + 1 for i in range(1, size, binsize): tmpx = None if i+binsize >= size: tmpx = self.tmpNumpyArrayFiles.arrays[Chrom][i : size] else: tmpx = self.tmpNumpyArrayFiles.arrays[Chrom][i : i+binsize] int_idx = np.nonzero(tmpx > 0) if int_idx[0].shape[0] == 0: output.append(0.0) continue #print(Chrom, tmpx.shape, i, i+binsize, tmpx) if coarsening_method == 'max': output.append(np.amax(tmpx[int_idx])) if coarsening_method == 'min': output.append(np.amin(tmpx[int_idx])) if coarsening_method == 'amean': output.append(np.mean(tmpx[int_idx])) if coarsening_method == 'hmean': output.append(spstats.hmean(tmpx[int_idx])) if coarsening_method == 'gmean': output.append(spstats.gmean(tmpx[int_idx])) if coarsening_method == 'median': output.append(np.median(tmpx[int_idx])) # print(Chrom, resolution, coarse_method, size, binsize, size/binsize, len(output), np.amax(output)) return np.asarray(output)
def saveAsH5(self, hdf5Out, title=None, resolutions=None, coarsening_methods=None, compression='lzf', keep_original=False): """To convert Wig files to hdf5 file Parameters ---------- hdf5Out : :class:`HDF5Handler` or str Output hdf5 file name or :class:`HDF5Handler` instance title : str Title of the data resolutions : list of str Additional input resolutions other than these default resolutions: 1kb', '2kb', '4kb', '5kb', '8kb', '10kb', '20kb', '40kb', '80kb', '100kb', '160kb','200kb', '320kb', '500kb', '640kb', and '1mb'. For Example: use ``resolutions=['25kb', '50kb', '75kb']`` to add additional 25kb, 50kb and 75kb resolution data. coarsening_methods : list of str Methods to coarse or downsample the data for converting from 1-base to coarser resolutions. Presently, five methods are implemented. * ``'min'`` -> Minimum value * ``'max'`` -> Maximum value * ``'amean'`` -> Arithmetic mean or average * ``'hmean'`` -> Harmonic mean * ``'gmean'`` -> Geometric mean * ``'median'`` -> Median In case of ``None``, all five methods will be considered. User may use only subset of these methods. For example: ``coarse_method=['max', 'amean']`` can be used for downsampling by only these two methods. compression : str data compression method in HDF5 file : ``lzf`` or ``gzip`` method. keep_original : bool Whether original data present in bigwig file should be incorporated in HDF5 file. This will significantly increase size of HDF5 file. """ if not self.isWigParsed: self.parseWig() # Storing data in hdf5 file self._StoreInHdf5File(hdf5Out, title, compression=compression, coarsening_methods=coarsening_methods, resolutions=resolutions, keep_original=keep_original)
def _PerformDataCoarsening(self, Chrom, resolution, coarse_method): """Base method to perform Data coarsening. This method read temporary Numpy array files and perform data coarsening using the given input method. .. warning:: **Private method**. Use it at your own risk. It is used internally in :meth:`BEDHandler._StoreInHdf5File`. Parameters ---------- Chrom : str Chromosome name resolution : str resolution in word. coarse_method : str Name of method to use for data coarsening. Accepted keywords: min, max, median, amean, gmean and hmean. """ output = [] binsize = util.resolutionToBinsize(resolution) size = self.chromSizeInfo[Chrom] + 1 for i in range(1, size, binsize): tmpx = None if i+binsize >= size: tmpx = self.tmpNumpyArrayFiles.arrays[Chrom][i : size] else: tmpx = self.tmpNumpyArrayFiles.arrays[Chrom][i : i+binsize] int_idx = np.nonzero(tmpx > 0) if int_idx[0].shape[0] == 0: output.append(0.0) continue #print(Chrom, tmpx.shape, i, i+binsize, tmpx) if coarse_method == 'max': output.append(np.amax(tmpx[int_idx])) if coarse_method == 'min': output.append(np.amin(tmpx[int_idx])) if coarse_method == 'amean': output.append(np.mean(tmpx[int_idx])) if coarse_method == 'hmean': output.append(spstats.hmean(tmpx[int_idx])) if coarse_method == 'gmean': output.append(spstats.gmean(tmpx[int_idx])) if coarse_method == 'median': output.append(np.median(tmpx[int_idx])) # print(Chrom, resolution, coarse_method, size, binsize, size/binsize, len(output), np.amax(output)) return np.asarray(output)
def saveAsH5(self, hdf5Out, title=None, resolutions=None, coarsening_methods=None, compression='lzf', keep_original=False): """To convert bed files to hdf5 file It parses bed files, coarsened the data and store in an input hdf5/h5 file. Parameters ---------- hdf5Out : :class:`HDF5Handler` or str Output hdf5 file name or :class:`HDF5Handler` instance title : str Title of the data resolutions : list of str Additional input resolutions other than these default resolutions: 1kb', '2kb', '4kb', '5kb', '8kb', '10kb', '20kb', '40kb', '80kb', '100kb', '160kb','200kb', '320kb', '500kb', '640kb', and '1mb'. For Example: use ``resolutions=['25kb', '50kb', '75kb']`` to add additional 25kb, 50kb and 75kb resolution data. coarsening_methods : list of str Methods to coarse or downsample the data for converting from 1-base to coarser resolutions. Presently, five methods are implemented. * ``'min'`` -> Minimum value * ``'max'`` -> Maximum value * ``'amean'`` -> Arithmetic mean or average * ``'hmean'`` -> Harmonic mean * ``'gmean'`` -> Geometric mean * ``'median'`` -> Median In case of ``None``, all five methods will be considered. User may use only subset of these methods. For example: ``coarse_method=['max', 'amean']`` can be used for downsampling by only these two methods. compression : str data compression method in HDF5 file : ``lzf`` or ``gzip`` method. keep_original : bool Whether original data present in bigwig file should be incorporated in HDF5 file. This will significantly increase size of HDF5 file. """ if not self.isBedParsed: self.parseBed() # Storing data in hdf5 file self._StoreInHdf5File(hdf5Out, title, resolutions=resolutions, coarsening_methods=coarsening_methods, compression=compression, keep_original=keep_original)
def _build_lexicons(self): tdf = (self.term_doc_matrix_.get_term_doc_count_df() [[t + ' freq' for t in [self.category_a_, self.category_b_] + self.neutral_categories_]]) tdf = tdf[tdf.sum(axis=1) > 0] self._find_a_vs_b_and_b_vs_a(tdf) tdf[self.category_a_ + ' scores'] = self.scorer.get_scores( tdf[self.category_a_ + ' freq'], tdf[[t for t in tdf.columns if t != self.category_a_ + ' freq']].sum(axis=1)) tdf[self.category_b_ + ' scores'] = self.scorer.get_scores( tdf[self.category_b_ + ' freq'], tdf[[t for t in tdf.columns if t != self.category_b_ + ' freq']].sum(axis=1)) tdf[self.category_a_ + ' + ' + self.category_b_ + ' scores'] = tdf[ [t + ' scores' for t in [self.category_a_, self.category_b_]]].apply( lambda x: hmean(x) if min(x) > 0 else 0, axis=1) tdf["not " + self.category_a_ + ' scores'] = self.scorer.get_scores( tdf[[t for t in tdf.columns if t != self.category_a_ + ' freq']].sum(axis=1), tdf[self.category_a_ + ' freq']) tdf["not " + self.category_b_ + ' scores'] = self.scorer.get_scores( tdf[[t for t in tdf.columns if t != self.category_b_ + ' freq']].sum(axis=1), tdf[self.category_b_ + ' freq']) tdf["not " + self.category_a_ + ' + ' + self.category_b_ + ' scores'] = tdf[ ['not ' + t + ' scores' for t in [self.category_a_, self.category_b_]]].apply( lambda x: hmean(x) if min(x) > 0 else 0, axis=1) self.category_a_words_ = list(tdf.sort_values(by=self.category_a_ + ' scores', ascending=False).index) self.category_b_words_ = list(tdf.sort_values(by=self.category_b_ + ' scores', ascending=False).index) self.category_a_and_b_words_ = list( tdf.sort_values(by=self.category_a_ + ' + ' + self.category_b_ + ' scores', ascending=False).index) self.not_category_a_words_ = list( tdf.sort_values(by='not ' + self.category_a_ + ' scores', ascending=False).index) self.not_category_b_words_ = list( tdf.sort_values(by='not ' + self.category_b_ + ' scores', ascending=False).index) self.not_category_a_and_b_words_ = list( tdf.sort_values(by='not ' + self.category_a_ + ' + ' + self.category_b_ + ' scores', ascending=False).index)