我们从Python开源项目中,提取了以下50个代码示例,用于说明如何使用pandas.DatetimeIndex()。
def minutes_for_days_in_range(self, start, end): """ Get all market minutes for the days between start and end, inclusive. """ start_date = self.normalize_date(start) end_date = self.normalize_date(end) all_minutes = [] for day in self.days_in_range(start_date, end_date): day_minutes = self.market_minutes_for_day(day) all_minutes.append(day_minutes) # Concatenate all minutes and truncate minutes before start/after end. return pd.DatetimeIndex( np.concatenate(all_minutes), copy=False, tz='UTC', )
def get_early_closes(start, end): # TSX closed at 1:00 PM on december 24th. start = canonicalize_datetime(start) end = canonicalize_datetime(end) early_close_rules = [] early_close_rules.append(quarta_cinzas) early_close_ruleset = rrule.rruleset() for rule in early_close_rules: early_close_ruleset.rrule(rule) early_closes = early_close_ruleset.between(start, end, inc=True) early_closes.sort() return pd.DatetimeIndex(early_closes)
def getData(): filename = "300333.csv" # df=pd.read_csv(filename,index_col='date') df = pd.read_csv(filename) print df['date'].values #new_date= datetime.datetime(df['date'].values) # # not work #date_list=[datetime.datetime.strptime(x,"%Y-%m-%d")for x in df['date'].values] #?????????????? pd_date = pd.DatetimeIndex(df['date'].values) #print date_list df['date'] = pd_date print df new_df = df.set_index('date') print "*" * 20 print new_df.index print new_df.ix[0]
def configure_timeindex(self): """ """ try: ormclass = self._mapped['TempResolution'] tr = self.session.query(ormclass).filter( ormclass.temp_id == self.temp_id).one() except (KeyError, NoResultFound): print('temp_id %s does not exist.' % self.temp_id) timeindex = pd.DatetimeIndex(start=tr.start_time, periods=tr.timesteps, freq=tr.resolution) self.timeindex = timeindex[self.start_snapshot - 1: self.end_snapshot]
def _validated_tuples_to_dataframe(self, validated_tuples): if validated_tuples == []: dts, values, estimateds = [], [], [] else: dts, values, estimateds = zip(*validated_tuples) if self.parse_dates: dts = [dateutil.parser.parse(dt) for dt in dts] index = pd.DatetimeIndex(dts) if index.shape[0] > 0: index = index.tz_convert(pytz.UTC) df = pd.DataFrame( {"value": values, "estimated": estimateds}, index=index, columns=["value", "estimated"], ) df.value = df.value.astype(float) df.estimated = df.estimated.astype(bool) return df
def _create_daily_stats(self, perfs): # create daily and cumulative stats dataframe daily_perfs = [] # TODO: the loop here could overwrite expected properties # of daily_perf. Could potentially raise or log a # warning. for perf in perfs: if 'daily_perf' in perf: perf['daily_perf'].update( perf['daily_perf'].pop('recorded_vars') ) perf['daily_perf'].update(perf['cumulative_risk_metrics']) daily_perfs.append(perf['daily_perf']) else: self.risk_report = perf daily_dts = pd.DatetimeIndex( [p['period_close'] for p in daily_perfs], tz='UTC' ) daily_stats = pd.DataFrame(daily_perfs, index=daily_dts) return daily_stats
def minutes_for_sessions_in_range(self, start_session_label, end_session_label): """ Returns all the minutes for all the sessions from the given start session label to the given end session label, inclusive. Parameters ---------- start_session_label: pd.Timestamp The label of the first session in the range. end_session_label: pd.Timestamp The label of the last session in the range. Returns ------- pd.DatetimeIndex The minutes in the desired range. """ first_minute, _ = self.open_and_close_for_session(start_session_label) _, last_minute = self.open_and_close_for_session(end_session_label) return self.minutes_in_range(first_minute, last_minute)
def _special_dates(self, calendars, ad_hoc_dates, start_date, end_date): """ Union an iterable of pairs of the form (time, calendar) and an iterable of pairs of the form (time, [dates]) (This is shared logic for computing special opens and special closes.) """ _dates = DatetimeIndex([], tz='UTC').union_many( [ holidays_at_time(calendar, start_date, end_date, time_, self.tz) for time_, calendar in calendars ] + [ days_at_time(datetimes, time_, self.tz) for time_, datetimes in ad_hoc_dates ] ) return _dates[(_dates >= start_date) & (_dates <= end_date)]
def make_equity_daily_bar_data(cls): days = cls.trading_calendar.sessions_in_range( pd.Timestamp('2006-01-03', tz='UTC'), pd.Timestamp('2006-01-09', tz='UTC') ) return trades_by_sid_to_dfs( { 0: factory.create_trade_history( 0, np.arange(10.0, 10.0 + len(days), 1.0), [10000] * len(days), timedelta(days=1), cls.sim_params, cls.trading_calendar), }, index=pd.DatetimeIndex(days), )
def build_dataframe(self): if not self.values.exists(): return pd.DataFrame() # Am I really a programmer or just a lego assembler? # Pandas makes my life at least 20 times easier. df = pd.DataFrame.from_records(self.values, index=self.index_column) # make the columns and labels prettier if self.rename_columns: df = df.rename(columns=self.column_mapping) df.index.name = TIME_COLUMN_NAME try: df.index = df.index.tz_convert(self.user.pytz_timezone) except AttributeError: # if attribute-error means the index is just a regular Index and # that only dates (and not time) was passed df.index = pd.DatetimeIndex(df.index, tz=self.user.pytz_timezone) # cast it as numerics if possible, otherwise if we're dealing with strings, ignore df = df.apply(pd.to_numeric, errors='ignore') return df
def round_timestamp_to_sleep_date(timeseries): """ Not my proudest function ... this isn't as efficient as it could be, but struggling with some pandas syntax to find the perfect pandas one-line This can be much more performant, but need time to sit down and figure it out """ sleep_dates = [] for value in timeseries: if value.hour < SLEEP_CUTOFF_TIME: result = value - pd.DateOffset(days=1) else: result = value sleep_dates.append(result) index = pd.DatetimeIndex(sleep_dates) return index
def resample_missing_values(df, date, period): df.set_index('date', inplace=True) # For duplicate values for same coordinates, the maximum value is chosen rather than average. df = (df.groupby(['lat', 'lon', 'countries'])).resample('D').max() df.reset_index(['lat', 'lon', 'countries'], drop=True, inplace=True) df['count'].fillna(0, inplace=True) df.groupby(['lat', 'lon', 'countries']).fillna(method='ffill', inplace=True) df.groupby(['lat', 'lon', 'countries']).fillna(method='bfill', inplace=True) df.reset_index(inplace=True) idx = pd.DatetimeIndex(start=date - dt.timedelta(days=(period - 1)), end=date, freq='D') new_df = pd.DataFrame() for index, group in df.groupby(['lat', 'lon', 'countries']): group = expand_date_range(group, idx) new_df = pd.concat([new_df, group]) new_df.rename(columns={'index': 'date'}, inplace=True) return new_df
def transform(self, df): temp = pd.DatetimeIndex(df.index) df['weekday'] = temp.weekday df_weekdays = df[df['weekday'] <= 4].drop('weekday', axis=1) weekdays = extract_days(df_weekdays) df_weekends = df[df['weekday'] > 4].drop('weekday', axis=1) weekends = extract_days(df_weekends) print 'weekdays: {}, weekends: {}'.format(len(weekdays), len(weekends)) print part_of_week = self.environment_params['part_of_week'].values[0] # if part_of_week == 'weekdays': print print 'Selected weekdays only' return df_weekdays elif part_of_week == 'weekends': print print 'Selected weekends only' return df_weekends else: print print 'Selected all days of week' return df.drop('weekday', axis=1)
def __setattr__(self, key, value): if key in ['data', 'keys', 'index']: self.__dict__[key] = value else: if type(value) == Column: if key in self.keys: self.data[self.keys.index(key)] = value.values self.index[self.keys.index(key), :] = value.index else: self.add_column(key, value.values, value.index) elif type(value) == np.ndarray: if key in self.keys: self.data[self.keys.index(key)] = value else: self.add_column(key, value) elif type(value) == pd.DatetimeIndex: if key in self.keys: self.data[self.keys.index(key)] = value.values else: self.add_column(key, value)
def plot(self, sort_csv_file, forecast_csv_file, save_fig_file): sort_df = pd.read_csv(sort_csv_file) sort_df['date'] = pd.to_datetime(sort_df['date'], format='%Y-%m-%d') sort_df = sort_df.set_index(pd.DatetimeIndex(sort_df['date'])) forecast_df = pd.read_csv(forecast_csv_file, header=None, names=['date', 'aver']) forecast_df['date'] = pd.to_datetime(forecast_df['date'], format='%Y-%m-%d') forecast_df = forecast_df.set_index(pd.DatetimeIndex(forecast_df['date'])) forecast_df['aver'].plot(figsize=(20, 20), c='r', linewidth=3.0) ax = sort_df['aver'].plot(figsize=(20, 20), linewidth=3.0) plt.ylabel('price') plt.xlabel('date') ax.set_ylim(sort_df['aver'].min() * 0.8, sort_df['aver'].max() * 1.2) plt.savefig(save_fig_file) plt.cla() plt.clf() plt.close()
def _hourly_range(self, init_date, time_frame): """ Returns DatetimeIndex trading week/s in hours. """ utcnow = datetime.utcnow() tr_wk_str, tr_wk_end = self.get_trading_week(init_date) if tr_wk_end > utcnow: tr_wk_end = utcnow.replace( minute=00,second=00, microsecond=00) freq, interval_type, delta = self._data_frequency(time_frame) dth = pd.date_range(str(tr_wk_str), str(tr_wk_end), freq=freq) while (len(dth) % (300*int(time_frame[1:])) == 0) == False: tr_wk_str = tr_wk_end + timedelta(**{interval_type: delta}) if tr_wk_str < utcnow: tr_wk_str, tr_wk_end = self.get_trading_week(tr_wk_str) if tr_wk_end > utcnow: tr_wk_end = utcnow.replace( minute=00,second=00, microsecond=00) tr_wk_end += timedelta(hours=1) dth = dth.append( pd.date_range(str(tr_wk_str), str(tr_wk_end), freq=freq)) else: break return dth
def _daily_range(self, daily): """ Returns DatetimeIndex for daily values. """ max_bars = 299 utcnow = datetime.utcnow() dtd = pd.DatetimeIndex([]) while daily < utcnow: tr_wk_str, tr_wk_end = self.get_trading_week(daily) hour = int(str(tr_wk_str.time())[:2]) daily += timedelta(days=1) daily = daily.replace(hour=hour) if daily >= tr_wk_end: daily, tr_wk_end = self.get_trading_week(daily) dtd = dtd.append( pd.date_range(str(daily), str(daily))) return dtd
def test_to_datetime_tz_pytz(self): # xref 8260 tm._skip_if_no_pytz() import pytz us_eastern = pytz.timezone('US/Eastern') arr = np.array([us_eastern.localize(datetime(year=2000, month=1, day=1, hour=3, minute=0)), us_eastern.localize(datetime(year=2000, month=6, day=1, hour=3, minute=0))], dtype=object) result = pd.to_datetime(arr, utc=True) expected = DatetimeIndex(['2000-01-01 08:00:00+00:00', '2000-06-01 07:00:00+00:00'], dtype='datetime64[ns, UTC]', freq=None) tm.assert_index_equal(result, expected)
def test_normalize(self): rng = date_range('1/1/2000 9:30', periods=10, freq='D') result = rng.normalize() expected = date_range('1/1/2000', periods=10, freq='D') self.assertTrue(result.equals(expected)) rng_ns = pd.DatetimeIndex(np.array([1380585623454345752, 1380585612343234312]).astype( "datetime64[ns]")) rng_ns_normalized = rng_ns.normalize() expected = pd.DatetimeIndex(np.array([1380585600000000000, 1380585600000000000]).astype( "datetime64[ns]")) self.assertTrue(rng_ns_normalized.equals(expected)) self.assertTrue(result.is_normalized) self.assertFalse(rng.is_normalized)
def test_append_concat(self): rng = date_range('5/8/2012 1:45', periods=10, freq='5T') ts = Series(np.random.randn(len(rng)), rng) df = DataFrame(np.random.randn(len(rng), 4), index=rng) result = ts.append(ts) result_df = df.append(df) ex_index = DatetimeIndex(np.tile(rng.values, 2)) self.assertTrue(result.index.equals(ex_index)) self.assertTrue(result_df.index.equals(ex_index)) appended = rng.append(rng) self.assertTrue(appended.equals(ex_index)) appended = rng.append([rng, rng]) ex_index = DatetimeIndex(np.tile(rng.values, 3)) self.assertTrue(appended.equals(ex_index)) # different index names rng1 = rng.copy() rng2 = rng.copy() rng1.name = 'foo' rng2.name = 'bar' self.assertEqual(rng1.append(rng1).name, 'foo') self.assertIsNone(rng1.append(rng2).name)
def test_sort_values(self): idx = DatetimeIndex(['2000-01-04', '2000-01-01', '2000-01-02']) ordered = idx.sort_values() self.assertTrue(ordered.is_monotonic) ordered = idx.sort_values(ascending=False) self.assertTrue(ordered[::-1].is_monotonic) ordered, dexer = idx.sort_values(return_indexer=True) self.assertTrue(ordered.is_monotonic) self.assert_numpy_array_equal(dexer, [1, 2, 0]) ordered, dexer = idx.sort_values(return_indexer=True, ascending=False) self.assertTrue(ordered[::-1].is_monotonic) self.assert_numpy_array_equal(dexer, [0, 2, 1])
def test_take(self): dates = [datetime(2010, 1, 1, 14), datetime(2010, 1, 1, 15), datetime(2010, 1, 1, 17), datetime(2010, 1, 1, 21)] for tz in [None, 'US/Eastern', 'Asia/Tokyo']: idx = DatetimeIndex(start='2010-01-01 09:00', end='2010-02-01 09:00', freq='H', tz=tz, name='idx') expected = DatetimeIndex(dates, freq=None, name='idx', tz=tz) taken1 = idx.take([5, 6, 8, 12]) taken2 = idx[[5, 6, 8, 12]] for taken in [taken1, taken2]: self.assertTrue(taken.equals(expected)) tm.assertIsInstance(taken, DatetimeIndex) self.assertIsNone(taken.freq) self.assertEqual(taken.tz, expected.tz) self.assertEqual(taken.name, expected.name)
def test_dayfirst(self): # GH 5917 arr = ['10/02/2014', '11/02/2014', '12/02/2014'] expected = DatetimeIndex([datetime(2014, 2, 10), datetime(2014, 2, 11), datetime(2014, 2, 12)]) idx1 = DatetimeIndex(arr, dayfirst=True) idx2 = DatetimeIndex(np.array(arr), dayfirst=True) idx3 = to_datetime(arr, dayfirst=True) idx4 = to_datetime(np.array(arr), dayfirst=True) idx5 = DatetimeIndex(Index(arr), dayfirst=True) idx6 = DatetimeIndex(Series(arr), dayfirst=True) self.assertTrue(expected.equals(idx1)) self.assertTrue(expected.equals(idx2)) self.assertTrue(expected.equals(idx3)) self.assertTrue(expected.equals(idx4)) self.assertTrue(expected.equals(idx5)) self.assertTrue(expected.equals(idx6))
def test_slice_year(self): dti = DatetimeIndex(freq='B', start=datetime(2005, 1, 1), periods=500) s = Series(np.arange(len(dti)), index=dti) result = s['2005'] expected = s[s.index.year == 2005] assert_series_equal(result, expected) df = DataFrame(np.random.rand(len(dti), 5), index=dti) result = df.ix['2005'] expected = df[df.index.year == 2005] assert_frame_equal(result, expected) rng = date_range('1/1/2000', '1/1/2010') result = rng.get_loc('2009') expected = slice(3288, 3653) self.assertEqual(result, expected)
def test_partial_slice(self): rng = DatetimeIndex(freq='D', start=datetime(2005, 1, 1), periods=500) s = Series(np.arange(len(rng)), index=rng) result = s['2005-05':'2006-02'] expected = s['20050501':'20060228'] assert_series_equal(result, expected) result = s['2005-05':] expected = s['20050501':] assert_series_equal(result, expected) result = s[:'2006-02'] expected = s[:'20060228'] assert_series_equal(result, expected) result = s['2005-1-1'] self.assertEqual(result, s.iloc[0]) self.assertRaises(Exception, s.__getitem__, '2004-12-31')
def test_timedelta(self): # this is valid too index = date_range('1/1/2000', periods=50, freq='B') shifted = index + timedelta(1) back = shifted + timedelta(-1) self.assertTrue(tm.equalContents(index, back)) self.assertEqual(shifted.freq, index.freq) self.assertEqual(shifted.freq, back.freq) result = index - timedelta(1) expected = index + timedelta(-1) self.assertTrue(result.equals(expected)) # GH4134, buggy with timedeltas rng = date_range('2013', '2014') s = Series(rng) result1 = rng - pd.offsets.Hour(1) result2 = DatetimeIndex(s - np.timedelta64(100000000)) result3 = rng - np.timedelta64(100000000) result4 = DatetimeIndex(s - pd.offsets.Hour(1)) self.assertTrue(result1.equals(result4)) self.assertTrue(result2.equals(result3))
def to_data_frame(self): index = pd.DatetimeIndex([self.timestamp]) df = pd.DataFrame(self.to_dict(), index=index, columns=self.headers()) df.index = df['timestamp'] df.drop('timestamp', 1, inplace=True) return df
def frame_to_series(self, field, frame, columns=None): """ Convert a frame with a DatetimeIndex and sid columns into a series with a sid index, using the aggregator defined by the given field. """ if isinstance(frame, pd.DataFrame): columns = frame.columns frame = frame.values if not len(frame): return pd.Series( data=(0 if field == 'volume' else np.nan), index=columns, ).values if field in ['price', 'close']: # shortcircuit for full last row vals = frame[-1] if np.all(~np.isnan(vals)): return vals return ffill(frame)[-1] elif field == 'open': return bfill(frame)[0] elif field == 'volume': return np.nansum(frame, axis=0) elif field == 'high': return np.nanmax(frame, axis=0) elif field == 'low': return np.nanmin(frame, axis=0) else: raise ValueError("Unknown field {}".format(field))
def fast_append_date_to_index(index, timestamp): """ Append a timestamp to a DatetimeIndex. DatetimeIndex.append does not appear to work. """ return pd.DatetimeIndex( np.hstack( [ index.values, [timestamp.asm8], ] ), tz='UTC', )
def market_minute_window(self, start, count, step=1): """ Return a DatetimeIndex containing `count` market minutes, starting with `start` and continuing `step` minutes at a time. """ if not self.is_market_hours(start): raise ValueError("market_minute_window starting at " "non-market time {minute}".format(minute=start)) all_minutes = [] current_day_minutes = self.market_minutes_for_day(start) first_minute_idx = current_day_minutes.searchsorted(start) minutes_in_range = current_day_minutes[first_minute_idx::step] # Build up list of lists of days' market minutes until we have count # minutes stored altogether. while True: if len(minutes_in_range) >= count: # Truncate off extra minutes minutes_in_range = minutes_in_range[:count] all_minutes.append(minutes_in_range) count -= len(minutes_in_range) if count <= 0: break if step > 0: start, _ = self.next_open_and_close(start) current_day_minutes = self.market_minutes_for_day(start) else: _, start = self.previous_open_and_close(start) current_day_minutes = self.market_minutes_for_day(start) minutes_in_range = current_day_minutes[::step] # Concatenate all the accumulated minutes. return pd.DatetimeIndex( np.concatenate(all_minutes), copy=False, tz='UTC', )
def get_early_closes(start, end): # TSX closed at 1:00 PM on december 24th. start = canonicalize_datetime(start) end = canonicalize_datetime(end) start = max(start, datetime(1993, 1, 1, tzinfo=pytz.utc)) end = max(end, datetime(1993, 1, 1, tzinfo=pytz.utc)) # Not included here are early closes prior to 1993 # or unplanned early closes early_close_rules = [] christmas_eve = rrule.rrule( rrule.MONTHLY, bymonth=12, bymonthday=24, byweekday=(rrule.MO, rrule.TU, rrule.WE, rrule.TH, rrule.FR), cache=True, dtstart=start, until=end ) early_close_rules.append(christmas_eve) early_close_ruleset = rrule.rruleset() for rule in early_close_rules: early_close_ruleset.rrule(rule) early_closes = early_close_ruleset.between(start, end, inc=True) early_closes.sort() return pd.DatetimeIndex(early_closes)
def current_dates(self): where = slice(self._start_index, self._pos) return pd.DatetimeIndex(deepcopy(self.date_buf[where]), tz='utc')
def get_current(self): """ Get a Panel that is the current data in view. It is not safe to persist these objects because internal data might change """ where = slice(self._oldest_frame_idx(), self._pos) major_axis = pd.DatetimeIndex(deepcopy(self.date_buf[where]), tz='utc') return pd.Panel(self.buffer.values[:, where, :], self.items, major_axis, self.minor_axis, dtype=self.dtype)
def current_dates(self): where = slice(self._oldest_frame_idx(), self._pos) return pd.DatetimeIndex(deepcopy(self.date_buf[where]), tz='utc')
def pipeline_event_loader_args(self, dates): """Construct the base object to pass to the loader. Parameters ---------- dates : pd.DatetimeIndex The dates we can serve. Returns ------- args : tuple[any] The arguments to forward to the loader positionally. """ return dates, self.get_dataset()
def temp_pipeline_engine(calendar, sids, random_seed, symbols=None): """ A contextManager that yields a SimplePipelineEngine holding a reference to an AssetFinder generated via tmp_asset_finder. Parameters ---------- calendar : pd.DatetimeIndex Calendar to pass to the constructed PipelineEngine. sids : iterable[int] Sids to use for the temp asset finder. random_seed : int Integer used to seed instances of SeededRandomLoader. symbols : iterable[str], optional Symbols for constructed assets. Forwarded to make_simple_equity_info. """ equity_info = make_simple_equity_info( sids=sids, start_date=calendar[0], end_date=calendar[-1], symbols=symbols, ) loader = make_seeded_random_loader(random_seed, calendar, sids) get_loader = lambda column: loader with tmp_asset_finder(equities=equity_info) as finder: yield SimplePipelineEngine(get_loader, calendar, finder)
def has_data_for_dates(series_or_df, first_date, last_date): """ Does `series_or_df` have data on or before first_date and on or after last_date? """ dts = series_or_df.index if not isinstance(dts, pd.DatetimeIndex): raise TypeError("Expected a DatetimeIndex, but got %s." % type(dts)) first, last = dts[[0, -1]] return (first <= first_date) and (last >= last_date)
def load_prices_from_csv(filepath, identifier_col, tz='UTC'): data = pd.read_csv(filepath, index_col=identifier_col) data.index = pd.DatetimeIndex(data.index, tz=tz) data.sort_index(inplace=True) return data
def __init__(self, first_trading_day, minute_index, market_opens, market_closes, ohlc_ratio): """ Parameters: ----------- first_trading_day : datetime-like UTC midnight of the first day available in the dataset. minute_index : pd.DatetimeIndex The minutes which act as an index into the corresponding values written into each sid's ctable. market_opens : pd.DatetimeIndex The market opens for each day in the data set. (Not yet required.) market_closes : pd.DatetimeIndex The market closes for each day in the data set. (Not yet required.) ohlc_ratio : int The factor by which the pricing data is multiplied so that the float data can be stored as an integer. """ self.first_trading_day = first_trading_day self.minute_index = minute_index self.market_opens = market_opens self.market_closes = market_closes self.ohlc_ratio = ohlc_ratio
def overwrite_novel_deltas(baseline, deltas, dates): """overwrite any deltas into the baseline set that would have changed our most recently known value. Parameters ---------- baseline : pd.DataFrame The first known values. deltas : pd.DataFrame Overwrites to the baseline data. dates : pd.DatetimeIndex The dates requested by the loader. Returns ------- non_novel_deltas : pd.DataFrame The deltas that do not represent a baseline value. """ get_indexes = dates.searchsorted novel_idx = ( get_indexes(deltas[TS_FIELD_NAME].values, 'right') - get_indexes(deltas[AD_FIELD_NAME].values, 'left') ) <= 1 novel_deltas = deltas.loc[novel_idx] non_novel_deltas = deltas.loc[~novel_idx] return sort_values(pd.concat( (baseline, novel_deltas), ignore_index=True, ), TS_FIELD_NAME), non_novel_deltas
def adjustments_from_deltas_no_sids(dense_dates, sparse_dates, column_idx, column_name, asset_idx, deltas): """Collect all the adjustments that occur in a dataset that does not have a sid column. Parameters ---------- dense_dates : pd.DatetimeIndex The dates requested by the loader. sparse_dates : pd.DatetimeIndex The dates that were in the raw data. column_idx : int The index of the column in the dataset. column_name : str The name of the column to compute deltas for. asset_idx : pd.Series[int -> int] The mapping of sids to their index in the output. deltas : pd.DataFrame The overwrites that should be applied to the dataset. Returns ------- adjustments : dict[idx -> Float64Overwrite] The adjustments dictionary to feed to the adjusted array. """ ad_series = deltas[AD_FIELD_NAME] idx = 0, len(asset_idx) - 1 return { dense_dates.get_loc(kd): overwrite_from_dates( ad_series.loc[kd], dense_dates, sparse_dates, idx, v, ) for kd, v in deltas[column_name].iteritems() }
def test_custom_query_time_tz(self): df = self.df.copy() df['timestamp'] = ( pd.DatetimeIndex(df['timestamp'], tz='EST') + timedelta(hours=8, minutes=44) ).tz_convert('utc').tz_localize(None) df.ix[3:5, 'timestamp'] = pd.Timestamp('2014-01-01 13:45') expr = bz.data(df, name='expr', dshape=self.dshape) loader = BlazeLoader(data_query_time=time(8, 45), data_query_tz='EST') ds = from_blaze( expr, loader=loader, no_deltas_rule=no_deltas_rules.ignore, missing_values=self.missing_values, ) p = Pipeline() p.add(ds.value.latest, 'value') p.add(ds.int_value.latest, 'int_value') dates = self.dates with tmp_asset_finder() as finder: result = SimplePipelineEngine( loader, dates, finder, ).run_pipeline(p, dates[0], dates[-1]) expected = df.drop('asof_date', axis=1) expected['timestamp'] = expected['timestamp'].dt.normalize().astype( 'datetime64[ns]', ).dt.tz_localize('utc') expected.ix[3:5, 'timestamp'] += timedelta(days=1) expected.set_index(['timestamp', 'sid'], inplace=True) expected.index = pd.MultiIndex.from_product(( expected.index.levels[0], finder.retrieve_all(expected.index.levels[1]), )) assert_frame_equal(result, expected, check_dtype=False)
def test_before_trading_start(self, test_name, num_days, freq, emission_rate): params = factory.create_simulation_parameters( num_days=num_days, data_frequency=freq, emission_rate=emission_rate) algo = BeforeTradingAlgorithm(sim_params=params) algo.run(source=[], overwrite_sim_params=False) self.assertEqual(algo.perf_tracker.day_count, num_days) self.assertTrue(params.trading_days.equals( pd.DatetimeIndex(algo.before_trading_at)), "Expected %s but was %s." % (params.trading_days, algo.before_trading_at))
def test_insert_hist_data(self): self._clear_db() init_db(self.db_info) # Insert two time-overlapped MarketDataBlocks async def run(loop, data): engine = await aiosa.create_engine( user=self.db_info['user'], db=self.db_info['db'], host=self.db_info['host'], password=self.db_info['password'], loop=loop, echo=False) await insert_hist_data(engine, 'Stock', data[0]) await insert_hist_data(engine, 'Stock', data[1]) engine.close() await engine.wait_closed() # Execute insertion blk0 = MarketDataBlock(testdata_insert_hist_data[0]) blk1 = MarketDataBlock(testdata_insert_hist_data[1]) data = [blk0, blk1] loop = asyncio.get_event_loop() loop.run_until_complete(run(loop, data)) # Verify insertion df_source = testdata_insert_hist_data[2] engine = create_engine(self.db_conn) conn = engine.connect() metadata = MetaData(engine, reflect=True) table = metadata.tables['Stock'] result = conn.execute(select([table])) # self.assertEqual(result.keys(), list(df_source.columns)) df = pd.DataFrame(result.fetchall()) df.columns = result.keys() _logger.debug(df.TickerTime[0]) df.TickerTime = pd.DatetimeIndex(df.TickerTime).tz_localize('UTC') df_source.TickerTime = df_source.TickerTime.apply(pd.Timestamp) _logger.debug(df.iloc[0]) assert_frame_equal(df, df_source)
def setUp(self): ind = pd.DatetimeIndex(freq='12h', start='2015-01-01', end='2015-01-02 23:59') self.insol = pd.Series(data=[500, 1000, 500, 1000], index=ind) self.energy = pd.Series(data=[1.0, 4, 1.0, 4], index=ind) self.aggregated = aggregation_insol(self.energy, self.insol, frequency='D') # Test for the expected energy waited result
def get_series(user,freq='1M'): user_df = user.sort_values(by='begin_date') user_series = pd.Series(list(user_df['event']), index=user_df['begin_date']) user_series.index = pd.DatetimeIndex(user_series.index) user_resample = user_series.resample(freq).sum().astype('str').replace(r'0', "", regex=True) #time = pd.date_range(start='2010-01',end='2016-06',freq=freq) #user_resample = user_resample.reindex(time,fill_value="").astype('str') #for i in range(len(user_resample)): # user_resample[i] = "".join(user_resample[i].split('0')) return user_resample
def get_series(self,freq='1M'): user_df = self.user.sort_values(by='begin_date') user_series = pd.Series(list(user_df['event']), index=user_df['begin_date']) user_series.index = pd.DatetimeIndex(user_series.index) user_resample = user_series.resample('1D').sum() time = pd.date_range(start='2010-01',end='2016-06',freq='1D') user_resample = user_resample.reindex(time,fill_value=0) user_resample = user_resample.astype('str') user_resample = user_resample.resample(freq).sum() for i in range(len(user_resample)): user_resample[i] = "".join(user_resample[i].split('0')) return user_resample
def read_co_data_rnn(): """ ????????????? """ print("start loading data...") sql = const.CO_PRICE_SQL_RNN df = read_data_from_mysql(sql) # tuples = list(zip(*[range(len(df)), df.price_date])) # # ?????? # index = pd.MultiIndex.from_tuples(tuples, names=['id', 'date']) # df.index = index df.index = pd.DatetimeIndex(df.price_date) print("loading data finished.") return df
def read_co_price(): """ ??????????? """ print("start loading data...") sql = const.CO_PRICE_SQL df = read_data_from_mysql(sql) df.index = pd.DatetimeIndex(df.price_date) print("loading data finished.") return df.drop(['price_date'], axis=1)