我们从Python开源项目中,提取了以下16个代码示例,用于说明如何使用pandas.Dataframe()。
def add_group_component(self, components, name, group): """Adds a component with given name that contains all of the components in group. Parameters ---------- components: Dataframe with components. name: Name of new group component. group: List of components that form the group. Returns ------- Dataframe with components. """ new_comp = components[components['component'].isin(set(group))].copy() new_comp['component'] = name components = components.append(new_comp) return components
def predictive_samples(self, df): """Sample from the posterior predictive distribution. Parameters ---------- df: Dataframe with dates for predictions (column ds), and capacity (column cap) if logistic growth. Returns ------- Dictionary with keys "trend", "seasonal", and "yhat" containing posterior predictive samples for that component. "seasonal" is the sum of seasonalities, holidays, and added regressors. """ df = self.setup_dataframe(df.copy()) sim_values = self.sample_posterior_predictive(df) return sim_values
def get_actions(start_date, end_date): """ :param start_date: :param end_date: :return: actions: pd.Dataframe """ dump_path = './cache/all_action_%s_%s.pkl' % (start_date, end_date) if os.path.exists(dump_path): actions = pickle.load(open(dump_path)) else: action_1 = get_actions_1() action_2 = get_actions_2() action_3 = get_actions_3() actions = pd.concat([action_1, action_2, action_3]) # type: pd.DataFrame actions = actions[(actions.time >= start_date) & (actions.time < end_date)] pickle.dump(actions, open(dump_path, 'w')) return actions
def get_actions(start_date, end_date): """ ????????????action?? :param start_date: :param end_date: :return: actions: pd.Dataframe """ dump_path = './cache/all_action_%s_%s.csv' % (start_date, end_date) if os.path.exists(dump_path): # actions = pickle.load(open(dump_path)) actions = pd.read_csv(dump_path) else: action_1 = get_actions_1() action_2 = get_actions_2() action_3 = get_actions_3() actions = pd.concat([action_1, action_2, action_3]) # type: pd.DataFrame actions = actions[(actions.time >= start_date) & (actions.time < end_date)] # pickle.dump(actions, open(dump_path, 'w')) actions.to_csv(dump_path, index=False) print 'action combination finish...' return actions
def get_actions(start_date, end_date): """ ????????? actions :param start_date: :param end_date: :return: actions: pd.Dataframe """ dump_path = './cache/all_action_%s_%s.pkl' % (start_date, end_date) if os.path.exists(dump_path): actions = pickle.load(open(dump_path)) else: action_1 = get_actions_1() action_2 = get_actions_2() action_3 = get_actions_3() actions = pd.concat([action_1, action_2, action_3]) actions = actions[(actions.time >= start_date) & (actions.time < end_date)] pickle.dump(actions, open(dump_path, 'w')) return actions
def sample_damage_state(self, Pr): """ Sample the damage state using a uniform random variable Parameters ----------- Pr : pd.Dataframe Probability of exceeding a damage state Returns ------- damage_state : pd.Series The damage state of each element """ p = pd.Series(data = np.random.uniform(size=Pr.shape[0]), index=Pr.index) damage_state = pd.Series(data=[None]* Pr.shape[0], index=Pr.index) for DS_names in Pr.columns: damage_state[p < Pr[DS_names]] = DS_names return damage_state
def get_stress(self, p=None, tindex=None): """Returns the stress or stresses of the time series object as a pandas DataFrame. If the time series object has multiple stresses each column represents a stress. Returns ------- stress: pd.Dataframe Pandas dataframe of the stress(es) """ if tindex is not None: return self.stress[tindex] else: return self.stress
def predict(self, df=None): """Predict using the prophet model. Parameters ---------- df: pd.DataFrame with dates for predictions (column ds), and capacity (column cap) if logistic growth. If not provided, predictions are made on the history. Returns ------- A pd.DataFrame with the forecast components. """ if df is None: df = self.history.copy() else: if df.shape[0] == 0: raise ValueError('Dataframe has no rows.') df = self.setup_dataframe(df.copy()) df['trend'] = self.predict_trend(df) seasonal_components = self.predict_seasonal_components(df) intervals = self.predict_uncertainty(df) # Drop columns except ds, cap, floor, and trend cols = ['ds', 'trend'] if 'cap' in df: cols.append('cap') if self.logistic_floor: cols.append('floor') # Add in forecast components df2 = pd.concat((df[cols], intervals, seasonal_components), axis=1) df2['yhat'] = df2['trend'] + df2['seasonal'] return df2
def sample_model(self, df, seasonal_features, iteration): """Simulate observations from the extrapolated generative model. Parameters ---------- df: Prediction dataframe. seasonal_features: pd.DataFrame of seasonal features. iteration: Int sampling iteration to use parameters from. Returns ------- Dataframe with trend, seasonality, and yhat, each like df['t']. """ trend = self.sample_predictive_trend(df, iteration) beta = self.params['beta'][iteration] seasonal = np.matmul(seasonal_features.as_matrix(), beta) * self.y_scale sigma = self.params['sigma_obs'][iteration] noise = np.random.normal(0, sigma, df.shape[0]) * self.y_scale return pd.DataFrame({ 'yhat': trend + seasonal + noise, 'trend': trend, 'seasonal': seasonal, })
def make_future_dataframe(self, periods, freq='D', include_history=True): """Simulate the trend using the extrapolated generative model. Parameters ---------- periods: Int number of periods to forecast forward. freq: Any valid frequency for pd.date_range, such as 'D' or 'M'. include_history: Boolean to include the historical dates in the data frame for predictions. Returns ------- pd.Dataframe that extends forward from the end of self.history for the requested number of periods. """ last_date = self.history_dates.max() dates = pd.date_range( start=last_date, periods=periods + 1, # An extra in case we include start freq=freq) dates = dates[dates > last_date] # Drop start if equals last_date dates = dates[:periods] # Return correct number of periods if include_history: dates = np.concatenate((np.array(self.history_dates), dates)) return pd.DataFrame({'ds': dates})
def get_actions(start_time, end_time): """ :param start_date: :param end_date: :return: actions: pd.Dataframe """ FilePath = "../JData/" ActionAllFile = "JData_Action_All.csv" #ActionAllFile = "JData_Action_before_327.csv" action_all = pd.read_csv(FilePath + ActionAllFile,nrows=100000) action_all.time = pd.to_datetime(action_all['time'],format='%Y-%m-%d %H:%M:%S') actions = action_all[(action_all.time >= start_time) & (action_all.time <= end_time)] return actions
def cdf_probability(self, x): """ Return the CDF probability for each state, based on the value of x Parameters ----------- x : pd.Series Control variable for each element Returns -------- Pr : pd.Dataframe Probability of exceeding a damage state """ state_names = [name for name, state in self.states()] Pr = pd.DataFrame(index = x.index, columns=state_names) for element in Pr.index: for state_name, state in self.states(): try: dist=state.distribution[element] except: dist=state.distribution['Default'] Pr.loc[element, state_name] = dist.cdf(x[element]) return Pr
def select_as_dataframe( self, table_name, column_list=None, where=None, extra=None): """ Get data in the database and return fetched data as a :py:class:`pandas.Dataframe` instance. :param str table_name: |arg_select_table_name| :param list column_list: |arg_select_as_xx_column_list| :param str where: |arg_select_where| :param str extra: |arg_select_extra| :return: Table data as a :py:class:`pandas.Dataframe` instance. :rtype: pandas.DataFrame :raises simplesqlite.NullDatabaseConnectionError: |raises_check_connection| :raises simplesqlite.TableNotFoundError: |raises_verify_table_existence| :raises simplesqlite.OperationalError: |raises_operational_error| :Example: :ref:`example-select-as-dataframe` .. note:: ``pandas`` package required to execute this method. """ import pandas if column_list is None: column_list = self.get_attr_name_list(table_name) result = self.select( select=",".join(SqlQuery.to_attr_str_list(column_list)), table_name=table_name, where=where, extra=extra) if result is None: return pandas.DataFrame() return pandas.DataFrame(result.fetchall(), columns=column_list)
def construct_empty_hist(self, columns): """Create an (empty) histogram of right type Create a multi-dim histogram by iterating through the columns in reverse order and passing a single-dim hist as input to the next column. :param list columns: histogram columns :returns: created histogram :rtype: histogrammar.Count """ hist = hg.Count() # create a multi-dim histogram by iterating through the columns in reverse order # and passing a single-dim hist as input to the next column for col in reversed(columns): # histogram type depends on the data type dt = np.dtype(self.var_dtype[col]) # processing function, e.g. only accept boolians during filling f = self.quantity[col] if col in self.quantity else hf.QUANTITY[dt.type] if len(columns) == 1: # df[col] is a pd.series quant = lambda x, fnc=f: fnc(x) else: # df[columns] is a pd.Dataframe # fix column to col quant = lambda x, fnc=f, clm=col: fnc(x[clm]) is_number = isinstance(dt.type(), np.number) is_timestamp = isinstance(dt.type(), np.datetime64) if is_number or is_timestamp: # numbers and timestamps are put in a sparse binned histogram bs = self.bin_specs.get(col, self._unit_bin_specs if is_number else self._unit_timestamp_specs) hist = hg.SparselyBin(binWidth=bs['bin_width'], origin=bs['bin_offset'], quantity=quant, value=hist) else: # string and boolians are treated as categories hist = hg.Categorize(quantity=quant, value=hist) # FIXME stick data types and number of dimension to histogram dta = [self.var_dtype[col] for col in columns] hist.datatype = dta[0] if len(columns) == 1 else dta hist.n_dim = len(columns) @property def n_bins(self): if hasattr(self, 'num'): return self.num elif hasattr(self, 'size'): return self.size else: raise RuntimeError('Cannot retrieve number of bins from hgr hist') hist.n_bins = n_bins return hist
def make_all_seasonality_features(self, df): """Dataframe with seasonality features. Includes seasonality features, holiday features, and added regressors. Parameters ---------- df: pd.DataFrame with dates for computing seasonality features and any added regressors. Returns ------- pd.DataFrame with regression features. list of prior scales for each column of the features dataframe. """ seasonal_features = [] prior_scales = [] # Seasonality features for name, props in self.seasonalities.items(): features = self.make_seasonality_features( df['ds'], props['period'], props['fourier_order'], name, ) seasonal_features.append(features) prior_scales.extend( [props['prior_scale']] * features.shape[1]) # Holiday features if self.holidays is not None: features, holiday_priors = self.make_holiday_features(df['ds']) seasonal_features.append(features) prior_scales.extend(holiday_priors) # Additional regressors for name, props in self.extra_regressors.items(): seasonal_features.append(pd.DataFrame(df[name])) prior_scales.append(props['prior_scale']) if len(seasonal_features) == 0: seasonal_features.append( pd.DataFrame({'zeros': np.zeros(df.shape[0])})) prior_scales.append(1.) return pd.concat(seasonal_features, axis=1), prior_scales
def predict_seasonal_components(self, df): """Predict seasonality components, holidays, and added regressors. Parameters ---------- df: Prediction dataframe. Returns ------- Dataframe with seasonal components. """ seasonal_features, _ = self.make_all_seasonality_features(df) lower_p = 100 * (1.0 - self.interval_width) / 2 upper_p = 100 * (1.0 + self.interval_width) / 2 components = pd.DataFrame({ 'col': np.arange(seasonal_features.shape[1]), 'component': [x.split('_delim_')[0] for x in seasonal_features.columns], }) # Add total for all regression components components = components.append(pd.DataFrame({ 'col': np.arange(seasonal_features.shape[1]), 'component': 'seasonal', })) # Add totals for seasonality, holiday, and extra regressors components = self.add_group_component( components, 'seasonalities', self.seasonalities.keys()) if self.holidays is not None: components = self.add_group_component( components, 'holidays', self.holidays['holiday'].unique()) components = self.add_group_component( components, 'extra_regressors', self.extra_regressors.keys()) # Remove the placeholder components = components[components['component'] != 'zeros'] X = seasonal_features.as_matrix() data = {} for component, features in components.groupby('component'): cols = features.col.tolist() comp_beta = self.params['beta'][:, cols] comp_features = X[:, cols] comp = ( np.matmul(comp_features, comp_beta.transpose()) * self.y_scale # noqa W503 ) data[component] = np.nanmean(comp, axis=1) data[component + '_lower'] = np.nanpercentile(comp, lower_p, axis=1) data[component + '_upper'] = np.nanpercentile(comp, upper_p, axis=1) return pd.DataFrame(data)