我们从Python开源项目中,提取了以下15个代码示例,用于说明如何使用pandas.series()。
def regression(nx, ny): """ Parameters ========== specturm : pd.series Pandas Series object nodes : list of nodes to be used for the continuum Returns ======= corrected : array Continuum corrected array continuum : array The continuum used to correct the data x : array The potentially truncated x values """ m, b, r_value, p_value, stderr = ss.linregress(nx, ny) c = m * nx + b return c
def fit(self,init_data,data): """ Import data to SPOT object Parameters ---------- init_data : list, numpy.array or pandas.Series initial batch to calibrate the algorithm data : numpy.array data for the run (list, np.array or pd.series) """ if isinstance(data,list): self.data = np.array(data) elif isinstance(data,np.ndarray): self.data = data elif isinstance(data,pd.Series): self.data = data.values else: print('This data format (%s) is not supported' % type(data)) return if isinstance(init_data,list): self.init_data = np.array(init_data) elif isinstance(init_data,np.ndarray): self.init_data = init_data elif isinstance(init_data,pd.Series): self.init_data = init_data.values elif isinstance(init_data,int): self.init_data = self.data[:init_data] self.data = self.data[init_data:] elif isinstance(init_data,float) & (init_data<1) & (init_data>0): r = int(init_data*data.size) self.init_data = self.data[:r] self.data = self.data[r:] else: print('The initial data cannot be set') return
def fit(self,init_data,data): """ Import data to biSPOT object Parameters ---------- init_data : list, numpy.array or pandas.Series initial batch to calibrate the algorithm () data : numpy.array data for the run (list, np.array or pd.series) """ if isinstance(data,list): self.data = np.array(data) elif isinstance(data,np.ndarray): self.data = data elif isinstance(data,pd.Series): self.data = data.values else: print('This data format (%s) is not supported' % type(data)) return if isinstance(init_data,list): self.init_data = np.array(init_data) elif isinstance(init_data,np.ndarray): self.init_data = init_data elif isinstance(init_data,pd.Series): self.init_data = init_data.values elif isinstance(init_data,int): self.init_data = self.data[:init_data] self.data = self.data[init_data:] elif isinstance(init_data,float) & (init_data<1) & (init_data>0): r = int(init_data*data.size) self.init_data = self.data[:r] self.data = self.data[r:] else: print('The initial data cannot be set') return
def fit(self,init_data,data): """ Import data to DSPOT object Parameters ---------- init_data : list, numpy.array or pandas.Series initial batch to calibrate the algorithm data : numpy.array data for the run (list, np.array or pd.series) """ if isinstance(data,list): self.data = np.array(data) elif isinstance(data,np.ndarray): self.data = data elif isinstance(data,pd.Series): self.data = data.values else: print('This data format (%s) is not supported' % type(data)) return if isinstance(init_data,list): self.init_data = np.array(init_data) elif isinstance(init_data,np.ndarray): self.init_data = init_data elif isinstance(init_data,pd.Series): self.init_data = init_data.values elif isinstance(init_data,int): self.init_data = self.data[:init_data] self.data = self.data[init_data:] elif isinstance(init_data,float) & (init_data<1) & (init_data>0): r = int(init_data*data.size) self.init_data = self.data[:r] self.data = self.data[r:] else: print('The initial data cannot be set') return
def fit(self,init_data,data): """ Import data to biDSPOT object Parameters ---------- init_data : list, numpy.array or pandas.Series initial batch to calibrate the algorithm data : numpy.array data for the run (list, np.array or pd.series) """ if isinstance(data,list): self.data = np.array(data) elif isinstance(data,np.ndarray): self.data = data elif isinstance(data,pd.Series): self.data = data.values else: print('This data format (%s) is not supported' % type(data)) return if isinstance(init_data,list): self.init_data = np.array(init_data) elif isinstance(init_data,np.ndarray): self.init_data = init_data elif isinstance(init_data,pd.Series): self.init_data = init_data.values elif isinstance(init_data,int): self.init_data = self.data[:init_data] self.data = self.data[init_data:] elif isinstance(init_data,float) & (init_data<1) & (init_data>0): r = int(init_data*data.size) self.init_data = self.data[:r] self.data = self.data[r:] else: print('The initial data cannot be set') return
def train(self, x): """ Train scale Parameters ---------- x: pd.series | np.array a column of data to train over """ raise NotImplementedError('Not Implemented')
def transform(self, x): """ Transform array|series x """ raise NotImplementedError('Not Implemented')
def inverse(self, x): """ Inverse transform array|series x """ raise NotImplementedError('Not Implemented')
def train(self, x, drop=None): """ Train scale Parameters ---------- x: pd.series| np.array a column of data to train over A discrete range is stored in a list """ if not len(x): return self.range.train(x, drop)
def transform(self, x): """ Transform array|series x """ # Discrete scales do not do transformations return x
def transform(self, x): """ Transform array|series x """ try: return self.trans.transform(x) except TypeError: return np.array([self.trans.transform(val) for val in x])
def inverse(self, x): """ Inverse transform array|series x """ try: return self.trans.inverse(x) except TypeError: return np.array([self.trans.inverse(val) for val in x])
def image_identification_datasetup(df1,df2,sample=30000): ''' Function: - takes two dataframe (dataframe should be the output dataframe from "feature_engineering_CNN" of feature_engineering_func.py) and convine two dataframe into one. - it also creates label pd.series for CNN image recognition filter applied: - "sample" value determines number of sample extract from each dataframe. for instance if sample = 30000, 30000 rows are randomly chosen from df1,df2,df3 and df4. - it also takeout countrycode and word columns inputs: 2 dataframe sample = number of rows you want to extract frim each dataframe outputs: dataframe and a label ''' random_index1 = np.random.choice(list(df1.index), sample, replace=False) random_index2 = np.random.choice(list(df2.index), sample, replace=False) df1 = df1.loc[list(random_index1)] df2 = df2.loc[list(random_index2)] df_test = pd.concat([df1,df2],axis = 0) df_test = df_test.drop(['countrycode','word'], axis=1) label = [1]*sample+[0]*sample # 1= df1, 0 = df2 label = np.array(label) label = pd.Series(label) label.index = df_test.index return df_test,label
def construct_empty_hist(self, columns): """Create an (empty) histogram of right type Create a multi-dim histogram by iterating through the columns in reverse order and passing a single-dim hist as input to the next column. :param list columns: histogram columns :returns: created histogram :rtype: histogrammar.Count """ hist = hg.Count() # create a multi-dim histogram by iterating through the columns in reverse order # and passing a single-dim hist as input to the next column for col in reversed(columns): # histogram type depends on the data type dt = np.dtype(self.var_dtype[col]) # processing function, e.g. only accept boolians during filling f = self.quantity[col] if col in self.quantity else hf.QUANTITY[dt.type] if len(columns) == 1: # df[col] is a pd.series quant = lambda x, fnc=f: fnc(x) else: # df[columns] is a pd.Dataframe # fix column to col quant = lambda x, fnc=f, clm=col: fnc(x[clm]) is_number = isinstance(dt.type(), np.number) is_timestamp = isinstance(dt.type(), np.datetime64) if is_number or is_timestamp: # numbers and timestamps are put in a sparse binned histogram bs = self.bin_specs.get(col, self._unit_bin_specs if is_number else self._unit_timestamp_specs) hist = hg.SparselyBin(binWidth=bs['bin_width'], origin=bs['bin_offset'], quantity=quant, value=hist) else: # string and boolians are treated as categories hist = hg.Categorize(quantity=quant, value=hist) # FIXME stick data types and number of dimension to histogram dta = [self.var_dtype[col] for col in columns] hist.datatype = dta[0] if len(columns) == 1 else dta hist.n_dim = len(columns) @property def n_bins(self): if hasattr(self, 'num'): return self.num elif hasattr(self, 'size'): return self.size else: raise RuntimeError('Cannot retrieve number of bins from hgr hist') hist.n_bins = n_bins return hist
def continuum_correct(spectrum, nodes=None, method='linear'): """ Apply a continuum correction to a given spectrum Parameters ========== spectrum : pd.Series A pandas series or Spectrum object nodes: list A list of the nodes between which piecewise continuum will be fit method : {'linear', 'regresison', 'cubic'} The type of regression to be fit, where 'linear' is a piecewise linear fit, 'regression' is an Ordinary Least Squares fit, and 'cubic' is a 2nd order polynomial fit. Returns ======= : pd.Series The continuum corrected Spectrum : pd.Series The continuum line """ x = spectrum.index y = spectrum if not nodes: nodes = [x[0], x[-1]] return_length = len(y) corrected = np.empty(return_length) continuum = np.empty(return_length) start = 0 nlist = list(zip(nodes, nodes[1:])) for i, n in enumerate(nlist): # Define indices into sub-series ny = y[n[0]:n[1]] nx = ny.index if i == 0: stop = start + len(y[:n[1]]) c = correction_methods[method](nx, ny, ex=y[:n[1]].index.values) ey = y[:n[1]] elif i == len(nlist) - 1: stop = start + len(y[n[0]:]) c = correction_methods[method](nx, ny, ex=y[n[0]:].index.values) ey = y[n[0]:] else: stop = start + len(ny) c = correction_methods[method](nx, ny) ey = ny continuum[start:stop] = c corrected[start:stop] = ey / c start = stop return pd.Series(corrected, index=x), pd.Series(continuum, index=x)