我们从Python开源项目中,提取了以下50个代码示例,用于说明如何使用scipy.stats.mode()。
def get_weather_dict(self,data_dir): t0 = time() filename = '../data_raw/' + data_dir.split('/')[-2] + '_weather.csv.dict.pickle' dumpload = DumpLoad( filename) if dumpload.isExisiting(): return dumpload.load() resDict = {} df = self.load_weatherdf(data_dir) for index, row in df.iterrows(): resDict[row['time_slotid']] = (index, row['weather'], row['temparature'], row['pm25']) for name, group in df.groupby('time_date'): resDict[name] = (-1, mode(group['weather'])[0][0], mode(group['temparature'])[0][0], mode(group['pm25'])[0][0]) dumpload.dump(resDict) print "dump weather dict:", round(time()-t0, 3), "s" return resDict
def find_history_data(self, row, history_dict=None,): start_district_id = row.iloc[0] time_id = row.iloc[1] index = ['history_mean','history_median','history_mode','history_plus_mean','history_plus_median', 'history_plus_mode'] min_list = self.__get_historylist_from_dict(history_dict, start_district_id, time_id) plus_list1 = self.__get_historylist_from_dict(history_dict, start_district_id, time_id-1) plus_list2 = self.__get_historylist_from_dict(history_dict, start_district_id, time_id-2) plus_list = np.array((plus_list1 + plus_list2 + min_list)) min_list = np.array(min_list) res =pd.Series([min_list.mean(), np.median(min_list), mode(min_list)[0][0], plus_list.mean(), np.median(plus_list),mode(plus_list)[0][0]], index = index) return res return pd.Series(res, index = ['history_mean', 'history_mode', 'history_median'])
def predict_proba(self,X): ''' return confidences (i.e., p(y_j|x)) (in the multi-dimensional output case, this should be an N x L x K array but @NOTE/@TODO: this is not the case at the moment! At the moment it is N x L x 2; For example, in [[ 0. 1. ] [ 0. 0.9] [ 0. 1. ] [ 0. 1. ] [ 0. 1. ] [ 1. 0.9]] y_j=6 with probability 0.9. ) ''' N,D = X.shape Y = zeros((N,self.L,2)) for i in range(N): V = zeros((self.M,self.L)) for m in range(self.M): V[m,:] = self.h[m].predict(array([X[i,:]])) k = mode(V)[0] Y[i,:,0] = k Y[i,:,1] = sum(V==k,axis=0)/self.M return Y
def predict(self, covtest): """get the predictions. Parameters ---------- X : ndarray, shape (n_trials, n_channels, n_channels) ndarray of SPD matrices. Returns ------- pred : ndarray of int, shape (n_trials, 1) the prediction for each trials according to the closest centroid. """ dist = self._predict_distances(covtest) neighbors_classes = self.classes_[numpy.argsort(dist)] out, _ = stats.mode(neighbors_classes[:, 0:self.n_neighbors], axis=1) return out.ravel()
def predict(self, X): """Predict the class labels for the provided data Parameters ---------- X : array-like, shape (n_ts, sz, d) Test samples. """ X_ = to_time_series_dataset(X) neigh_dist, neigh_ind = self.kneighbors(X_) weights = _get_weights(neigh_dist, self.weights) if weights is None: mode, _ = stats.mode(self._fit_y[neigh_ind], axis=1) else: mode, _ = weighted_mode(self._fit_y[neigh_ind], weights, axis=1) return mode[:, 0]
def mode(data=None): """ Compute mode of given numpy array or pandas series. Mode is just a wrapper around scipy.stats.mode which returns the mode of a given numpy array or pandas series. Missing values are omitted before the mode is computed. Args: data: A numpy array or pandas series. Returns: The mode of x as scalar value. Raises: ValueError: If no data is specified or if all values are missing. """ if data is None: raise ValueError('No data specified.') if not len(data.dropna()): raise ValueError('No valid data specified.') mode_val = stats.mode(data.dropna(), nan_policy='omit')[0] return mode_val[0]
def __init__(self, model_param): self.interface_layer = model_param['interface_layer'] self.middle_layers = model_param['middle_layers'] self.cost_layer = model_param['cost_layer'] self.last_n = model_param['last_n'] self.outputs = model_param.get('outputs', None) self.errors = model_param.get('errors', None) self.name = model_param["name"] self.problem_type = model_param["problem_type"] self.mode = "train" self.param = [] for layer in self.middle_layers: self.param += layer.param self.param += self.cost_layer.param self.set_mode(self.mode) self.grad = self.get_grad() self.cost_func = self.get_cost_func() self.output_func_dict = self.get_output_func_dict() self.error_func_dict = self.get_error_func_dict()
def query(self, points): """ @summary: Estimate a set of test points given the model we built. @param points: should be a numpy array with each row corresponding to a specific query. @returns the estimated values according to the saved model. """ pred = np.zeros(points.shape[0]) #initialize prediction vector for i in range(0, points.shape[0]): #iterate over each test example sqDist = np.zeros(np.shape(self.dataY)) #initialize squared distances vector for j in range(0,self.dataX.shape[1]): sqDist[:,0] += (points[i,j] - self.dataX[:,j])**2 sqDist = np.concatenate((sqDist, self.dataY), axis=1) sqDist = np.asarray(sorted(sqDist, key=lambda x:x[0])) # classify: calculate mode & no. of counts of modal value pred[i], binCounts = stats.mode(sqDist[0:self.k,1]) return pred
def query(self, testX): """ @summary: Add test data to query individual learners in BagLearner @param testX: ndarray, X test data with examples in rows & features in columns Returns pred: 1Darray, the predicted labels """ pred = np.empty((testX.shape[0],self.bags)) # initialize pred, no. of # rows = no. of test examples, no of columns = no. of individual learners for col in range(pred.shape[1]): # predictions for each learner in rows of pred pred[:,col] = self.learnerList[col].query(testX) modeValue, binCount = stats.mode(pred, axis = 1) # mode and number of # counts along columns (i.e. over all learners) returned as column vectors return modeValue[:,0] # return (column) mode of all learners in 1D-array
def ensure_no_stuckbits(F, args, fn): bias = re.split('[\[ \] \: \,]', F[0].header['BIASSEC'])[1:-1] biassec = [int(t)-((i+1)%2) for i,t in enumerate(bias)] data = F[0].data[biassec[2]:biassec[3], biassec[0]:biassec[1]] mode_value = int(mode(data.ravel()).mode) missing_values = [] for i in np.arange(mode_value-8, mode_value+9): if (data == i).sum() == 0: missing_values.append(i) for missing in missing_values: args.log.warning('The value %i is not represented ' 'in the overscan region for %s' %(missing, fn)) if len(missing_values): return False else: return True
def test_random_weights(): # set this up so that each row should have a weighted mode of 6, # with a score that is easily reproduced mode_result = 6 rng = np.random.RandomState(0) x = rng.randint(mode_result, size=(100, 10)) w = rng.random_sample(x.shape) x[:, :5] = mode_result w[:, :5] += 1 mode, score = weighted_mode(x, w, axis=1) assert_array_equal(mode, mode_result) assert_array_almost_equal(score.ravel(), w[:, :5].sum(1))
def getNDValue(self): """ Get value of not defined depth value distances :return:value of not defined depth value """ if self.dpt[self.dpt < self.minDepth].shape[0] > self.dpt[self.dpt > self.maxDepth].shape[0]: return stats.mode(self.dpt[self.dpt < self.minDepth])[0][0] else: return stats.mode(self.dpt[self.dpt > self.maxDepth])[0][0]
def getCrop(self, dpt, xstart, xend, ystart, yend, zstart, zend, thresh_z=True): """ Crop patch from image :param dpt: depth image to crop from :param xstart: start x :param xend: end x :param ystart: start y :param yend: end y :param zstart: start z :param zend: end z :param thresh_z: threshold z values :return: cropped image """ if len(dpt.shape) == 2: cropped = dpt[max(ystart, 0):min(yend, dpt.shape[0]), max(xstart, 0):min(xend, dpt.shape[1])].copy() # add pixels that are out of the image in order to keep aspect ratio cropped = numpy.pad(cropped, ((abs(ystart)-max(ystart, 0), abs(yend)-min(yend, dpt.shape[0])), (abs(xstart)-max(xstart, 0), abs(xend)-min(xend, dpt.shape[1]))), mode='constant', constant_values=0) elif len(dpt.shape) == 3: cropped = dpt[max(ystart, 0):min(yend, dpt.shape[0]), max(xstart, 0):min(xend, dpt.shape[1]), :].copy() # add pixels that are out of the image in order to keep aspect ratio cropped = numpy.pad(cropped, ((abs(ystart)-max(ystart, 0), abs(yend)-min(yend, dpt.shape[0])), (abs(xstart)-max(xstart, 0), abs(xend)-min(xend, dpt.shape[1])), (0, 0)), mode='constant', constant_values=0) else: raise NotImplementedError() if thresh_z is True: msk1 = numpy.bitwise_and(cropped < zstart, cropped != 0) msk2 = numpy.bitwise_and(cropped > zend, cropped != 0) cropped[msk1] = zstart cropped[msk2] = 0. # backface is at 0, it is set later return cropped
def __init__(self, data_block): #check if data element is of class datablock self.check_datatype(data_block,'data_block',DataBlock) self.datablock = data_block #though redundant but will make code mode readable self.target = self.datablock.target #get tuple of available data self.dp = self.datablock.data_present().values()
def __init__(self, data_block): #check if data element is of class datablock self.check_datatype(data_block,'data_block',DataBlock) self.datablock = data_block self.target = self.datablock.target #though redundant but will make code mode readable #get tuple of available data self.dp = self.datablock.data_present().values()
def voting(self, y_true, pred): if y_true.shape[0] != pred.shape[0]: raise ValueError('Both arrays should have the same size!') # split the arrays in songs arr_size = y_true.shape[0] pred = np.split(pred, arr_size/self.augment_factor) y_true = np.split(y_true, arr_size/self.augment_factor) # Empty answers voting_truth = [] voting_ans = [] for x,y in zip(y_true, pred): voting_truth.append(mode(x)[0][0]) voting_ans.append(mode(y)[0][0]) return np.array(voting_truth), np.array(voting_ans) # @Class: MusicDataGenerator # @Description: # featurewise_center: set input mean to 0 over the dataset. # samplewise_center: set each sample mean to 0. # featurewise_std_normalization: divide inputs by std of the dataset. # samplewise_std_normalization: divide each input by its std. # zca_whitening: apply ZCA whitening.
def predict(self, X): ''' return predictions for X (multi-dimensionally speaking, i.e., we return the mode) ''' N,D = X.shape Y = zeros((N,self.L)) for i in range(N): V = zeros((self.M,self.L)) for m in range(self.M): V[m,:] = self.h[m].predict(array([X[i,:]])) Y[i,:] = mode(V)[0] return Y
def _most_frequent(array, extra_value, n_repeat): """Compute the most frequent value in a 1d array extended with [extra_value] * n_repeat, where extra_value is assumed to be not part of the array.""" # Compute the most frequent value in array only if array.size > 0: mode = stats.mode(array) most_frequent_value = mode[0][0] most_frequent_count = mode[1][0] else: most_frequent_value = 0 most_frequent_count = 0 # Compare to array + [extra_value] * n_repeat if most_frequent_count == 0 and n_repeat == 0: return np.nan elif most_frequent_count < n_repeat: return extra_value elif most_frequent_count > n_repeat: return most_frequent_value elif most_frequent_count == n_repeat: # Ties the breaks. Copy the behaviour of scipy.stats.mode if most_frequent_value < extra_value: return most_frequent_value else: return extra_value
def summarize(self, x, summary_func, missing_data_cond, in_place=False): """ Substitutes missing values with a statistical summary of each feature vector Parameters ---------- x : numpy.array Assumes that each feature column is of single type. Converts digit string features to float. summary_func : function Summarization function to be used for imputation (mean, median, mode, max, min...) missing_data_cond : function Method that takes one value and returns True if it represents missing data or false otherwise. """ if in_place: data = x else: data = np.copy(x) # replace missing values with the summarization function for col in xrange(x.shape[1]): nan_ids = missing_data_cond(x[:, col]) if True in nan_ids: val = summary_func(x[~nan_ids, col]) data[nan_ids, col] = val return data
def build_tree(self, dataX, dataY): if self.verbose: print "build_tree", self.leaf_size if self.verbose: print "data shape", dataX.shape #if no elements in subtree, return empty subtree if dataX.shape[0] == 0: return np.array([]) #if there is only 1 item left or if fewer than leaf size, return mode of data if dataX.ndim == 1 or dataX.shape[0] <= self.leaf_size: return np.array([-1, stats.mode(dataY).mode[0], -1, -1]) #if all of the data has the same value, return that value # if not np.all(dataY - dataY[0]): # print 'all same' # return np.array([-1, dataY[0],-1,-1]) else: if self.verbose: print "passed conditions" i = np.random.randint(dataX.shape[1]-1) d = np.random.randint(dataX.shape[0],size=2) for j in range(11): if dataX[d[0],i] != dataX[d[1],i]: break else: d[1] = np.random.randint(dataX.shape[0]) if j == 10: return np.array([-1, dataY[d[0]], -1, -1]) splitVal = (dataX[d[0],i] + dataX[d[1],i])/2.0 indices = dataX[:,i] <= splitVal leftTree = self.build_tree(dataX[indices,:], dataY[indices]) indices = dataX[:, i] > splitVal rightTree = self.build_tree(dataX[indices,:], dataY[indices]) leftTreeSize = leftTree.shape[0] if leftTree.ndim != 1 else 1 if leftTree.shape[0] == 0 or rightTree.shape[0] == 0: leftTreeSize = 0 root = [i, splitVal, 1, leftTreeSize + 1] if (leftTree.shape[0] != 0): root = np.vstack((root, leftTree)) if (rightTree.shape[0] != 0): root = np.vstack((root, rightTree)) return np.array(root)
def _most_common(img): """ Subtract the most common value from the whole image """ common = mode(img, axis=None).mode[0] res = img - common res[res < 0] = 0 return res
def compute_class_averages(self): """ Computes the class average of each node in the tree. Class average is the mode of training data that partitions to the node. """ for i in range(2, self.nodes + 1): parent = self.graph.predecessors(i)[0] if self.graph.node[parent]['cutoff'] is None: self.graph.node[i]['classval'] = self.graph.node[parent]['classval'] else: node_indices = self.partition_data(i) classval = mode(self.y[node_indices]).mode[0] self.graph.node[i]['classval'] = classval
def predict(self, x, k=1, model='regression'): """ Note: currenly only works on single vector and not matrices Args: x (np.ndarray): Training data of shape[1, n_features] k (int): number of nearest neighbor to consider model: {'regression', 'classification'} K nearest neighbor classification or regression. Choice most likely depends on the type of data the model was fit with. Returns: float: Returns predicted value Raises: ValueError if model has not been fit """ if not self.learned: raise NameError('Fit model first') distances = np.array([]) for row in range(np.shape(self.samples)[0]): # Add distance from x to sample row to distances vector distances = np.append(distances, np.linalg.norm(x - self.samples[row, :])) nearestneighbors = distances.argsort()[:k] if model == 'regression': prediction = self.values[nearestneighbors].mean() if model == 'classification': prediction = stats.mode(self.values[nearestneighbors]).mode return prediction
def predict(self, estimator_args, with_prob=False): if self.voting == 'hard': # sub_res -> (estimator_dim, batch_dim) sub_res = np.array([estimator.predict_func(*estimator_args) for estimator in self.estimators], dtype=theano.config.floatX) mode_res, count = mode(sub_res, axis=0) return (mode_res[0], count[0]/self.n_estimators) if with_prob else mode_res[0] else: # sub_res -> (estimator_dim, batch_dim, target_dim) sub_res = np.array([estimator.predict_prob_func(*estimator_args) for estimator in self.estimators], dtype=theano.config.floatX) sub_res = sub_res.mean(axis=0) max_res = np.argmax(sub_res, axis=1) mean_prob = sub_res[np.arange(sub_res.shape[0]), max_res] return (max_res, mean_prob) if with_prob else max_res
def predict_sent(self, sent, with_prob=False): if self.voting == 'hard': # sub_res -> (estimator_dim, ) sub_res = np.array([estimator.predict_sent(sent) for estimator in self.estimators], dtype=np.float32) mode_res, count = mode(sub_res) return (mode_res[0], count[0]/self.n_estimators) if with_prob else mode_res[0] else: # sub_res -> (estimator_dim, target_dim) sub_res = np.array([estimator.predict_sent(sent, with_prob=True) for estimator in self.estimators], dtype=np.float32) sub_res = sub_res.mean(axis=0) max_res = np.argmax(sub_res) mean_prob = sub_res[max_res] return (max_res, mean_prob) if with_prob else max_res
def predict_emotion(self,feature): """ Get arousal and valence class from feature. Input: Feature (standard deviasion and mean) from all frequency bands and channels with dimesion 1 x M (number of feature). Output: Class of emotion between 1 to 3 from each arousal and valence. 1 denotes low category, 2 denotes normal category, and 3 denotes high category. """ #Compute canberra with arousal training data distance_ar = map(lambda x:ss.distance.canberra(x,feature),self.train_arousal) #Compute canberra with valence training data distance_va = map(lambda x:ss.distance.canberra(x,feature),self.train_valence) #Compute 3 nearest index and distance value from arousal idx_nearest_ar = np.array(np.argsort(distance_ar)[:3]) val_nearest_ar = np.array(np.sort(distance_ar)[:3]) #Compute 3 nearest index and distance value from arousal idx_nearest_va = np.array(np.argsort(distance_va)[:3]) val_nearest_va = np.array(np.sort(distance_va)[:3]) #Compute comparation from first nearest and second nearest distance. If comparation less or equal than 0.7, then take class from the first nearest distance. Else take frequently class. #Arousal comp_ar = val_nearest_ar[0]/val_nearest_ar[1] if comp_ar<=0.97: result_ar = self.class_arousal[0,idx_nearest_ar[0]] else: result_ar = sst.mode(self.class_arousal[0,idx_nearest_ar]) result_ar = float(result_ar[0]) #Valence comp_va = val_nearest_va[0]/val_nearest_va[1] if comp_va<=0.97: result_va = self.class_valence[0,idx_nearest_va[0]] else: result_va = sst.mode(self.class_valence[0,idx_nearest_va]) result_va = float(result_va[0]) return result_ar,result_va
def getCrop(self, dpt, xstart, xend, ystart, yend, zstart, zend, thresh_z=True, background=0): """ Crop patch from image :param dpt: depth image to crop from :param xstart: start x :param xend: end x :param ystart: start y :param yend: end y :param zstart: start z :param zend: end z :param thresh_z: threshold z values :return: cropped image """ if len(dpt.shape) == 2: cropped = dpt[max(ystart, 0):min(yend, dpt.shape[0]), max(xstart, 0):min(xend, dpt.shape[1])].copy() # add pixels that are out of the image in order to keep aspect ratio cropped = numpy.pad(cropped, ((abs(ystart)-max(ystart, 0), abs(yend)-min(yend, dpt.shape[0])), (abs(xstart)-max(xstart, 0), abs(xend)-min(xend, dpt.shape[1]))), mode='constant', constant_values=background) elif len(dpt.shape) == 3: cropped = dpt[max(ystart, 0):min(yend, dpt.shape[0]), max(xstart, 0):min(xend, dpt.shape[1]), :].copy() # add pixels that are out of the image in order to keep aspect ratio cropped = numpy.pad(cropped, ((abs(ystart)-max(ystart, 0), abs(yend)-min(yend, dpt.shape[0])), (abs(xstart)-max(xstart, 0), abs(xend)-min(xend, dpt.shape[1])), (0, 0)), mode='constant', constant_values=background) else: raise NotImplementedError() if thresh_z is True: msk1 = numpy.logical_and(cropped < zstart, cropped != 0) msk2 = numpy.logical_and(cropped > zend, cropped != 0) cropped[msk1] = zstart cropped[msk2] = 0. # backface is at 0, it is set later return cropped
def majority_vote(votes): mode_result = mode(votes, axis=0) return mode_result.mode[0]
def mode(x, axis = None, keepdims = False): from scipy.stats import mode as sp_mode mode_x, _ = sp_mode(x, axis = axis) if not keepdims: mode_x = np.take(mode_x, 0, axis = axis) return mode_x
def set_mode(self, mode): self.mode = mode for layer in self.middle_layers: layer.set_mode(mode) self.cost_layer.set_mode(mode)
def get_cost(self, data_iterator): ret = 0 old_mode = self.mode self.set_mode('predict') data_iterator.begin(do_shuffle=False) while True: ret += self.cost_func(*(data_iterator.get_batch())) data_iterator.next() if data_iterator.no_batch_left(): break self.set_mode(old_mode) return ret / data_iterator.total()
def get_error_dict(self, data_iterator): if len(self.error_func_dict) > 0: l = {} for key in self.error_func_dict: ret = 0 old_mode = self.mode self.set_mode('predict') data_iterator.begin(do_shuffle=False) while True: ret += self.error_func_dict[key](*(data_iterator.get_batch())) data_iterator.next() if data_iterator.no_batch_left(): break self.set_mode(old_mode) l['key'] = ret / data_iterator.total() return l #else: # disable, since only for binary predictions #error = 0 #old_mode = self.mode #self.set_mode('predict') #data_iterator.begin(do_shuffle=False) #while True: # output = self.output_func_dict[0](*data_iterator.input_batch()) # target = data_iterator.output_batch()[0] # pred = output.reshape((output.shape[0])) > 0.5 # target = target.reshape(target.shape[0]).astype("bool") # error += (pred == target).sum() # data_iterator.next() # if data_iterator.no_batch_left(): # break #error = 1 - (error / numpy_floatX(data_iterator.total())) #self.set_mode(old_mode) #return [error]
def get_cost(self, data_iterator): ret = 0 old_mode = self.mode self.set_mode('predict') data_iterator.begin(do_shuffle=False) while True: ret += self.cost_func(*(data_iterator.get_batch())) data_iterator.next() if data_iterator.no_batch_left(): break self.set_mode(old_mode) return ret / (data_iterator.total()*data_iterator.num_segments)
def get_error_dict(self, data_iterator): if len(self.error_func_dict) > 0: l = {} for key in self.error_func_dict: ret = 0 old_mode = self.mode self.set_mode('predict') data_iterator.begin(do_shuffle=False) while True: ret += self.error_func_dict[key](*(data_iterator.get_batch())) data_iterator.next() if data_iterator.no_batch_left(): break self.set_mode(old_mode) l['key'] = ret / (data_iterator.total()*data_iterator.num_segments) return l #else: # disable, since only for binary predictions #error = 0 #old_mode = self.mode #self.set_mode('predict') #data_iterator.begin(do_shuffle=False) #while True: # output = self.output_func_dict[0](*data_iterator.input_batch()) # target = data_iterator.output_batch()[0] # pred = output.reshape((output.shape[0])) > 0.5 # target = target.reshape(target.shape[0]).astype("bool") # error += (pred == target).sum() # data_iterator.next() # if data_iterator.no_batch_left(): # break #error = 1 - (error / numpy_floatX(data_iterator.total()*data_iterator.num_segments)) #self.set_mode(old_mode) #return [error]
def json_graph(model, categories=None, scales=None): json_dict = {} # build links json representation json_dict["links"] = [] for s_node, t_node, n_common in model.links_: link_dict = {"source": str(s_node), "target": str(t_node), "n_common": n_common} json_dict["links"].append(link_dict) # node json representation json_dict["nodes"] = [] for (p_n, p) in enumerate(model.nodes_): for (c_n, c) in enumerate(p): node_dict = {"id": str((p_n, c_n)), "int_id": model.nodes_to_int_[(p_n, c_n)], "color": 1, "n_members": c.shape[0]} if categories is not None: for name, arr in categories.items(): node_dict[name] = int(mode(arr[c], axis=None)[0]) if scales is not None: for name, arr in scales.items(): node_dict[name] = np.mean(arr[c], axis=None) json_dict["nodes"].append(node_dict) # list of categories and scales e_name = "categories_and_scales" json_dict[e_name] = [] for c in (["int_id"] + (list(categories.keys()) if categories is not None else [])): json_dict[e_name].append({"name": c, "type": "category"}) for s in (list(scales.keys()) if scales is not None else []): json_dict[e_name].append({"name": s, "type": "scale"}) return json.dumps(json_dict, indent=4)
def test_uniform_weights(): # with uniform weights, results should be identical to stats.mode rng = np.random.RandomState(0) x = rng.randint(10, size=(10, 5)) weights = np.ones(x.shape) for axis in (None, 0, 1): mode, score = stats.mode(x, axis) mode2, score2 = weighted_mode(x, weights, axis) assert_true(np.all(mode == mode2)) assert_true(np.all(score == score2))
def transform(self, y, replace_unique=False): """Transform labels to normalized encoding. Parameters ---------- y : array-like of shape [n_samples] Target values. Returns ------- y : array-like of shape [n_samples] """ self._check_fitted() classes = np.unique(y) _check_numpy_unicode_bug(classes) if len(np.intersect1d(classes, self.classes_)) < len(classes): diff = np.setdiff1d(classes, self.classes_) if replace_unique: mode = stats.mode(y)[0][0] for i in xrange(0,len(y)): if y[i] in diff: y[i]=mode else: raise ValueError("y contains new labels: %s" % str(diff)) return np.searchsorted(self.classes_, y)
def mode_function(df): counts = mode(df) return counts[0][0]
def freq_from_autocorr(signal, sampling_rate): corr = fftconvolve(signal, signal[::-1], mode='full') corr = corr[len(corr)//2:] d = np.diff(corr) start = find_index_by_true(d > 0)[0] peak = np.argmax(corr[start:]) + start px, py = parabolic(corr, peak) return sampling_rate / px
def predict(self, X): """ Predict values using the model Parameters ---------- X : {array-like, sparse matrix} of shape [n_samples, n_features] Returns ------- C : numpy array of shape [n_samples, n_outputs] Predicted values. """ dim = len(self._classifiers) ensemble_output = np.zeros((len(X),dim)) # Z-score X = (X-self._med)/(self._std+self._noise) for i in range(0,dim): xrot_z = X.dot(self._inforotar[i]) ensemble_output[:,i] = self._classifiers[i].predict(xrot_z) y_pred = mode(ensemble_output, axis=1)[0] return y_pred
def pr_object(detect, truth, overlap=10): # we assume that both truth and detect volumes are separate objects from scipy import stats # TODO: 64-bit support # manual relabel (could be slow!) utruth = np.unique(truth) utruth = utruth[utruth > 0] udetect = np.unique(detect) udetect = udetect[udetect > 0] tp = 0.0 fp = 0.0 fn = 0.0 # TODO: removing only greatest match # for each truth object find a detection for t in utruth: # background is ignored match = detect[truth == t] match = match[match > 0] # get rid of spurious values match = stats.mode(match) if match[1] >= overlap: tp += 1 # any detected objects can only be used once, so remove them here. # detect = mahotas.labeled.remove_regions(detect, match[0]) detect[detect == match[0]] = 0 else: fn += 1 # detect_left, fp = mahotas.labeled.relabel(detect) fp = np.unique(detect) fp = fp[fp > 0] fp = len(fp) precision = 0 recall = 0 if tp + fp > 0: precision = tp/(tp+fp) if tp + fn > 0: recall = tp/(tp+fn) if (precision == 0) or (recall == 0): f1 = 0 else: f1 = (2*precision*recall)/(precision+recall) print(precision) print(recall) print(f1) return precision, recall, f1
def predict(self, X): """Predict the class labels for the provided data Parameters ---------- X : array-like, shape (n_query, n_features). Test samples. Returns ------- y : array of shape [n_samples] Class labels for each data sample. """ # TODO: Make classification of multiple samples a bit more effective... if X.ndim > 1 and X.shape[1] != 1: out = [] for x in X: out += self.predict(x) return out X = X.flatten() if self.metric == 'minkowski': dists = np.sum(np.abs(self._data - X) ** self.p, axis=1) else: # TODO: Implement other metrics. raise ValueError("Only Minkowski distance metric implemented...") argument = np.argsort(dists) labels = self._labels[argument[:self.n_neighbors]] if self.weights == 'distance': weights = 1 / dists[argument[:self.n_neighbors]] out = np.zeros((len(self._classes), ), 'float') for i, c in enumerate(self._classes): out[i] = np.sum(weights[labels == c]) out /= np.sum(out) y_pred = self._labels[np.argmax(out)] else: y_pred, _ = mode(labels) return y_pred.tolist()
def _dense_fit(self, X, strategy, missing_values, axis): """Fit the transformer on dense data.""" X = check_array(X, force_all_finite=False) mask = _get_mask(X, missing_values) masked_X = ma.masked_array(X, mask=mask) # Mean if strategy == "mean": mean_masked = np.ma.mean(masked_X, axis=axis) # Avoid the warning "Warning: converting a masked element to nan." mean = np.ma.getdata(mean_masked) mean[np.ma.getmask(mean_masked)] = np.nan return mean # Median elif strategy == "median": if tuple(int(v) for v in np.__version__.split('.')[:2]) < (1, 5): # In old versions of numpy, calling a median on an array # containing nans returns nan. This is different is # recent versions of numpy, which we want to mimic masked_X.mask = np.logical_or(masked_X.mask, np.isnan(X)) median_masked = np.ma.median(masked_X, axis=axis) # Avoid the warning "Warning: converting a masked element to nan." median = np.ma.getdata(median_masked) median[np.ma.getmaskarray(median_masked)] = np.nan return median # Most frequent elif strategy == "most_frequent": # scipy.stats.mstats.mode cannot be used because it will no work # properly if the first element is masked and if it's frequency # is equal to the frequency of the most frequent valid element # See https://github.com/scipy/scipy/issues/2636 # To be able access the elements by columns if axis == 0: X = X.transpose() mask = mask.transpose() most_frequent = np.empty(X.shape[0]) for i, (row, row_mask) in enumerate(zip(X[:], mask[:])): row_mask = np.logical_not(row_mask).astype(np.bool) row = row[row_mask] most_frequent[i] = _most_frequent(row, np.nan, 0) return most_frequent
def predict(model, training_cnf, predict_dir, weights_from, dataset_name, convert, image_size, sync, predict_type): images = data.get_image_files(predict_dir) # Form now, hard coded models, cnfs, and weights # Need to take these from program inputs or an ensembling config file print('Creating predictor 1') weights_from1 = 'weights.sa/model-epoch-97.ckpt' model1 = 'examples/mnist_model_sa.py' training_cnf1 = 'examples/mnist_cnf.py' model_def1 = util.load_module(model1) model1 = model_def1.model cnf1 = util.load_module(training_cnf1).cnf standardizer = cnf1.get('standardizer', NoOpStandardizer()) preprocessor = convert_preprocessor(model_def1.image_size[0]) if convert else None prediction_iterator1 = create_prediction_iter(cnf1, standardizer, model_def1.crop_size, preprocessor, sync) # predictor1 = QuasiCropPredictor(model1, cnf1, weights_from1, prediction_iterator1, 20) predictor1 = OneCropPredictor(model1, cnf1, weights_from1, prediction_iterator1) print('Creating predictor 2') weights_from2 = 'weights.rv/model-epoch-31.ckpt' model2 = 'examples/mnist_model.py' training_cnf2 = 'examples/mnist_cnf.py' model_def2 = util.load_module(model2) model2 = model_def2.model cnf2 = util.load_module(training_cnf2).cnf standardizer = cnf2.get('standardizer', NoOpStandardizer()) preprocessor = convert_preprocessor(model_def2.image_size[0]) if convert else None prediction_iterator2 = create_prediction_iter(cnf2, standardizer, model_def2.crop_size, preprocessor, sync) # predictor2 = QuasiCropPredictor(model2, cnf2, weights_from2, prediction_iterator2, 20) predictor2 = OneCropPredictor(model2, cnf2, weights_from2, prediction_iterator2) predictor = EnsemblePredictor([predictor1, predictor2]) def softmax_result_to_vote(predictions): return predictions.argmax(axis=1) def vote_combiner(votes): return mode(votes, axis=0)[0].reshape(-1) class_predictions = predictor.predict_with_voting( images, [softmax_result_to_vote, softmax_result_to_vote], vote_combiner ) if not os.path.exists(os.path.join(predict_dir, '..', 'results')): os.mkdir(os.path.join(predict_dir, '..', 'results')) if not os.path.exists(os.path.join(predict_dir, '..', 'results', dataset_name)): os.mkdir(os.path.join(predict_dir, '..', 'results', dataset_name)) names = data.get_names(images) image_class_predictions = np.column_stack([names, class_predictions]) title = np.array(['image', 'label']) image_class_predictions = np.vstack([title, image_class_predictions]) prediction_class_file = os.path.abspath( os.path.join(predict_dir, '..', 'results', dataset_name, 'predictions_class.csv')) np.savetxt(prediction_class_file, image_class_predictions, delimiter=",", fmt="%s") print('Class predictions saved to: %s' % prediction_class_file)