我们从Python开源项目中,提取了以下50个代码示例,用于说明如何使用pandas.Timedelta()。
def offset(self, time_delta): """ Get the future chain for this root symbol with a given offset from the current as_of_date. Parameters ---------- time_delta : datetime.timedelta or pandas.Timedelta or str The offset from the current as_of_date for the new chain. Returns ------- FutureChain """ return self.as_of(self.as_of_date + Timedelta(time_delta)) # http://www.cmegroup.com/product-codes-listing/month-codes.html
def predict_tf_once(day,start_date = '2016-10-1'): all_dataset = get_dataset(day) all_dataset = map(lambda x:x.ix[start_date:start_date],all_dataset) y_p_features = map(lambda user_id:tf_percent_model.resample_x_y_(all_dataset,user_id)[0].reshape(-1),get_full_user_ids()) y_p_features_df = pd.DataFrame(y_p_features,index = get_full_user_ids()) percent = pd.DataFrame.from_csv('./features/tensorflow_model/percent_model/%d.csv'%day) #percent = pd.DataFrame.from_csv('./features/tensorflow_model/percent_model/%d.csv'%2) #%% percent = percent[map(lambda x:'percent#%d'%x,range(_feature_length))] t = pd.DataFrame(index = percent.index) t[pd.Timestamp(start_date)+pd.Timedelta('%dd'%(day-1))] = (np.array(y_p_features_df)*percent).sum(axis=1) t = t.T t.to_csv('./result/predict_part/%d.csv'%day) real = int(np.round((np.array(y_p_features_df)*percent).sum().sum())) print (day,real) return (day,real)
def make_month_features(holiday_df): df_list = [] for cols in ['holiday','festday']: new_df = pd.DataFrame(index = holiday_df.index) holi = holiday_df[cols].copy() holi_new = holi.copy() #predict 30 days and 30days for features for d in range(30): holi_new.index += pd.Timedelta('1D') new_df['%s#-%d'%(cols,d+1)] = holi_new #create 31 models for d in range(31+3): #predict 31 days + 3days new_df['%s#%d'%(cols,d)] = holi holi.index -= pd.Timedelta('1D') new_df = new_df[map(lambda day:'%s#%d'%(cols,day),range(-30,30+3))] new_df = new_df.ix['2015-1-1':'2016-12-31'] df_list.append(new_df.dropna()) return df_list
def make_features(user_id,user_df): """ ?????? """ print 'user_id:', user_id power = user_df.power_consumption assert power.index[0] == user_df.index[0] assert len(user_df.index) == 639 new_df = pd.DataFrame(index=user_df.index.union(pd.date_range('2016-9-1','2016-9-30'))) pw_new = power.copy() #predict 30 days and 30days for features for d in range(60): pw_new.index += pd.Timedelta('1D') new_df['power#-%d'%(d+1)] = pw_new #create 30 models for d in range(30): #30 days features x_ = new_df[new_df.columns[d:30+d]] x_['y'] = power x_.to_csv('./features/day_model/%d/%d.csv'%(d+1,user_id)) #return x_
def make_month_features(user_id,user_df): """ ?????? """ print 'user_id:', user_id power = user_df.power_consumption.copy() assert power.index[0] == user_df.index[0] new_df = pd.DataFrame(index=user_df.index.union(pd.date_range('2016-10-1','2016-10-31'))) pw_new = power.copy() #predict 30 days and 30days for features for d in range(30): pw_new.index += pd.Timedelta('1D') new_df['power#-%d'%(d+1)] = pw_new #create 30 models for d in range(31): #30 days features new_df['y#%d'%d] = power power.index -= pd.Timedelta('1D') save_month_df(new_df,user_id) return new_df
def make_history_month_features(user_id,user_df): """ ?????? """ print 'user_id:', user_id power = user_df.power_consumption.copy() feature_df = history_feature(power) new_df = pd.DataFrame(index = feature_df.index) #create 30 models for d in range(30): for cols in feature_df: #30 days features new_df[cols+'#%d'%d] = feature_df[cols] feature_df.index -= pd.Timedelta('1D') new_df = new_df.dropna() save_history_df(new_df.dropna(),user_id) return new_df
def train(obj, user, replays, age): """Manually train the model for a given user. """ import os import pickle import pandas as pd from slider import Library, Client from slider.model import train_from_replay_directory if age is not None: age = pd.Timedelta(age) m = train_from_replay_directory( replays, client=Client(Library(obj.maps), obj.api_key), age=age, ) with open(os.path.join(obj.models, user), 'wb') as f: pickle.dump(m, f)
def gen_token(token_secret, user): """Generate a token for a user. Parameters ---------- token_secret : secret The secret to encrypt with. user : str The user to make a token for. Returns ------- token : str The encrypted token. """ now = pd.Timestamp.now(tz='utc') return token_secret.encrypt( json.dumps({ 'issued': now.isoformat(), 'expires': (now + pd.Timedelta(hours=12)).isoformat(), 'user': user, }).encode('utf-8') ).decode('utf-8')
def should_trigger(self, dt): if self.date is None or dt >= self.next_date: # initialize or reset for new date self.triggered = False self.date = dt # record the timestamp for the next day, so that we can use it # to know if we've moved to the next day self.next_date = dt + pd.Timedelta(1, unit="d") if not self.triggered and self.rule.should_trigger(dt): self.triggered = True return True # Factory API
def _update(stock, conn): try: print "update ----- :", stock query = "select * from '%s' order by date" % stock df = pd.read_sql(query, conn) df = df.set_index('date') print "sql saved:", df.tail(1),df.ix[-1],df.ix[-1].name if dt.now().weekday() == 5: today = str(pd.Timestamp(dt.now()) - pd.Timedelta(days=1))[:10] elif dt.now().weekday() == 6: today = str(pd.Timestamp(dt.now()) - pd.Timedelta(days=2))[:10] else: today = str(pd.Timestamp(dt.now()))[:10] print "today:",today if today != df.ix[-1].name[:10]: df = ts.get_h_data(stock, start=df.ix[-1].name[:10], retry_count=5, pause=1) print "read from tu:",df.head(1) df[['open', 'high', 'close', 'low', 'volume']].to_sql(stock, conn, if_exists='append') import time time.sleep(10) except Exception, arg: print "exceptionu:", stock, arg errorlist.append(stock)
def generate_stock(fn, directory=None, freq=pd.Timedelta(seconds=60), start=pd.Timestamp('2000-01-01'), end=pd.Timestamp('2050-01-01')): start = pd.Timestamp(start) directory = directory or os.path.join('data', 'generated') fn2 = os.path.split(fn)[1] sym = fn2[len('table_'):fn2.find('.csv')] if not os.path.exists(directory): os.mkdir(directory) if not os.path.exists(os.path.join(directory, sym)): os.mkdir(os.path.join(directory, sym)) df = load_file(fn) for date, rec in df.to_dict(orient='index').items(): if start <= pd.Timestamp(date) <= end: df2 = generate_day(date, freq=freq, **rec) fn2 = os.path.join(directory, sym, str(date).replace(' ', 'T') + '.csv') df2.to_csv(fn2) print('Finished %s' % sym)
def test_add_iadd(self): # only test adding/sub offsets as + is now numeric # offset offsets = [pd.offsets.Hour(2), timedelta(hours=2), np.timedelta64(2, 'h'), Timedelta(hours=2)] for delta in offsets: rng = timedelta_range('1 days', '10 days') result = rng + delta expected = timedelta_range('1 days 02:00:00', '10 days 02:00:00', freq='D') tm.assert_index_equal(result, expected) rng += delta tm.assert_index_equal(rng, expected) # int rng = timedelta_range('1 days 09:00:00', freq='H', periods=10) result = rng + 1 expected = timedelta_range('1 days 10:00:00', freq='H', periods=10) tm.assert_index_equal(result, expected) rng += 1 tm.assert_index_equal(rng, expected)
def test_overflow(self): # GH 9442 s = Series(pd.date_range('20130101', periods=100000, freq='H')) s[0] += pd.Timedelta('1s 1ms') # mean result = (s - s.min()).mean() expected = pd.Timedelta((pd.DatetimeIndex((s - s.min())).asi8 / len(s) ).sum()) # the computation is converted to float so might be some loss of # precision self.assertTrue(np.allclose(result.value / 1000, expected.value / 1000)) # sum self.assertRaises(ValueError, lambda: (s - s.min()).sum()) s1 = s[0:10000] self.assertRaises(ValueError, lambda: (s1 - s1.min()).sum()) s2 = s[0:1000] result = (s2 - s2.min()).sum()
def test_to_timedelta_on_nanoseconds(self): # GH 9273 result = Timedelta(nanoseconds=100) expected = Timedelta('100ns') self.assertEqual(result, expected) result = Timedelta(days=1, hours=1, minutes=1, weeks=1, seconds=1, milliseconds=1, microseconds=1, nanoseconds=1) expected = Timedelta(694861001001001) self.assertEqual(result, expected) result = Timedelta(microseconds=1) + Timedelta(nanoseconds=1) expected = Timedelta('1us1ns') self.assertEqual(result, expected) result = Timedelta(microseconds=1) - Timedelta(nanoseconds=1) expected = Timedelta('999ns') self.assertEqual(result, expected) result = Timedelta(microseconds=1) + 5 * Timedelta(nanoseconds=-2) expected = Timedelta('990ns') self.assertEqual(result, expected) self.assertRaises(TypeError, lambda: Timedelta(nanoseconds='abc'))
def test_timedelta_hash_equality(self): # GH 11129 v = Timedelta(1, 'D') td = timedelta(days=1) self.assertEqual(hash(v), hash(td)) d = {td: 2} self.assertEqual(d[v], 2) tds = timedelta_range('1 second', periods=20) self.assertTrue(all(hash(td) == hash(td.to_pytimedelta()) for td in tds)) # python timedeltas drop ns resolution ns_td = Timedelta(1, 'ns') self.assertNotEqual(hash(ns_td), hash(ns_td.to_pytimedelta()))
def test_get_indexer(self): idx = pd.date_range('2000-01-01', periods=3) tm.assert_numpy_array_equal(idx.get_indexer(idx), [0, 1, 2]) target = idx[0] + pd.to_timedelta(['-1 hour', '12 hours', '1 day 1 hour']) tm.assert_numpy_array_equal(idx.get_indexer(target, 'pad'), [-1, 0, 1]) tm.assert_numpy_array_equal( idx.get_indexer(target, 'backfill'), [0, 1, 2]) tm.assert_numpy_array_equal( idx.get_indexer(target, 'nearest'), [0, 1, 1]) tm.assert_numpy_array_equal( idx.get_indexer(target, 'nearest', tolerance=pd.Timedelta('1 hour')), [0, -1, 1]) with tm.assertRaises(ValueError): idx.get_indexer(idx[[0]], method='nearest', tolerance='foo')
def test_get_loc(self): idx = pd.to_timedelta(['0 days', '1 days', '2 days']) for method in [None, 'pad', 'backfill', 'nearest']: self.assertEqual(idx.get_loc(idx[1], method), 1) self.assertEqual(idx.get_loc(idx[1].to_pytimedelta(), method), 1) self.assertEqual(idx.get_loc(str(idx[1]), method), 1) self.assertEqual( idx.get_loc(idx[1], 'pad', tolerance=pd.Timedelta(0)), 1) self.assertEqual( idx.get_loc(idx[1], 'pad', tolerance=np.timedelta64(0, 's')), 1) self.assertEqual(idx.get_loc(idx[1], 'pad', tolerance=timedelta(0)), 1) with tm.assertRaisesRegexp(ValueError, 'must be convertible'): idx.get_loc(idx[1], method='nearest', tolerance='foo') for method, loc in [('pad', 1), ('backfill', 2), ('nearest', 1)]: self.assertEqual(idx.get_loc('1 day 1 hour', method), loc)
def test_column_dups_indexing2(self): # GH 8363 # datetime ops with a non-unique index df = DataFrame({'A': np.arange(5, dtype='int64'), 'B': np.arange(1, 6, dtype='int64')}, index=[2, 2, 3, 3, 4]) result = df.B - df.A expected = Series(1, index=[2, 2, 3, 3, 4]) assert_series_equal(result, expected) df = DataFrame({'A': date_range('20130101', periods=5), 'B': date_range('20130101 09:00:00', periods=5)}, index=[2, 2, 3, 3, 4]) result = df.B - df.A expected = Series(pd.Timedelta('9 hours'), index=[2, 2, 3, 3, 4]) assert_series_equal(result, expected)
def _unpack_state(self, result: pd.DataFrame, locs: dict): """Put restart values in the state dataset""" # We concatenate with the old state values in case we don't # have 90 new days to use tmin = np.concatenate((self.state['t_min'].sel(**locs).values[:], result['t_min'].values)) tmax = np.concatenate((self.state['t_max'].sel(**locs).values[:], result['t_max'].values)) prec = np.concatenate((self.state['prec'].sel(**locs).values[:], result['prec'].values)) self.state['t_min'].sel(**locs).values[:] = tmin[-90:] self.state['t_max'].sel(**locs).values[:] = tmax[-90:] self.state['prec'].sel(**locs).values[:] = prec[-90:] self.state['swe'].sel(**locs).values = result['swe'].values[-1] state_start = result.index[-1] - pd.Timedelta('89 days') self.state.time.values = date_range(state_start, result.index[-1], calendar=self.params['calendar'])
def event_to_min_per_hour(df, event): def hourly(start, end): ret = [(start.floor("1h"), 60 - start.minute)] t = start.ceil("1h") while t <= end: ret.append((t, 60)) t += pd.Timedelta("1h") ret.append((end.floor("1h"), end.minute - 60)) return ret df = df[df.event.str.contains(event)] res = [] for i, (start, end, _) in df.iterrows(): res += hourly(start, end) df = pd.DataFrame(res) df.columns = ['ut_ms', event + "_mins"] df = df.set_index('ut_ms') df = df.resample("1h").sum().fillna(0.0) return df
def test_simulated_historical_forecasts(self): m = Prophet() m.fit(self.__df) k = 2 for p in [1, 10]: for h in [1, 3]: period = '{} days'.format(p) horizon = '{} days'.format(h) df_shf = diagnostics.simulated_historical_forecasts( m, horizon=horizon, k=k, period=period) # All cutoff dates should be less than ds dates self.assertTrue((df_shf['cutoff'] < df_shf['ds']).all()) # The unique size of output cutoff should be equal to 'k' self.assertEqual(len(np.unique(df_shf['cutoff'])), k) self.assertEqual( max(df_shf['ds'] - df_shf['cutoff']), pd.Timedelta(horizon), ) dc = df_shf['cutoff'].diff() dc = dc[dc > pd.Timedelta(0)].min() self.assertTrue(dc >= pd.Timedelta(period)) # Each y in df_shf and self.__df with same ds should be equal df_merged = pd.merge(df_shf, self.__df, 'left', on='ds') self.assertAlmostEqual( np.sum((df_merged['y_x'] - df_merged['y_y']) ** 2), 0.0)
def ensure_timestamps(timestamps, func_get_latest_time=None, if_fail='ignore'): t_is_not_timestamp = [t is None or isinstance(t, (str, pd.Timedelta, timedelta)) for t in timestamps] if any(t_is_not_timestamp): if t_is_not_timestamp[-1]: last_timestamp = to_datetime(timestamps[-1], from_datetime=func_get_latest_time, if_invalid='return_none') if last_timestamp is None: if if_fail == 'ignore': return [] elif if_fail == 'raise': raise ValueError('Cannot convert timestamps {!r}'.format(timestamps)) else: raise NotImplementedError('Unsupported handling method when fail: {}' .format(if_fail)) timestamps[-1] = last_timestamp else: last_timestamp = timestamps[-1] timestamps[:-1] = [to_datetime(t, from_datetime=last_timestamp) if is_not_timestamp else t for t, is_not_timestamp in zip(timestamps[:-1], t_is_not_timestamp[:-1])] # if all(t == timestamps[0] for t in timestamps[1:]): # timestamps = [timestamps[0]] return timestamps
def get_comment_product_fea(endtime): enddt = pd.to_datetime(endtime,format = '%Y-%m-%d') if enddt == pd.to_datetime('2016-04-15',format = '%Y-%m-%d'): commentdata = pd.read_csv(FilePath + CommentFile) commentdata = commentdata[(commentdata["dt"] == "2016-04-15")] commentdata = commentdata.sort_values(by="sku_id").reset_index()[["sku_id", "comment_num", "has_bad_comment", "bad_comment_rate"]] return commentdata else: startdt = enddt - pd.Timedelta(days=7) commentpath = FilePath + CommentFile commentdata_ALL = pd.read_csv(commentpath) # ?Jdatya_comment.csv?????? commentdata_ALL.dt = pd.to_datetime(commentdata_ALL.dt, format='%Y-%m-%d') # ?dt????date?? comment = commentdata_ALL[(commentdata_ALL.dt <= enddt) & (commentdata_ALL.dt > startdt)] df = pd.get_dummies(comment['comment_num'], prefix='comment_num') comment = pd.concat([comment, df], axis=1) comment = comment[['sku_id', 'has_bad_comment', 'bad_comment_rate', 'comment_num_1', 'comment_num_2', 'comment_num_3','comment_num_4']] sorted_comment = comment.sort_values(by=['sku_id']).reset_index().drop('index',1) #sorted_comment.to_csv(FilePath + 'skuFeaInComment_before'+str(enddt), index=False) return sorted_comment # ????????
def extract_days(input_delta): """ Helper function to extract the number of days from a time delta. Returns: - Number of days, if valid time delta - np.NaN if time delta is null or invalid :param input_delta: :return: number of days in time delta :rtype: float """ # Attempt to coerce into Pandas time delta delta = pd.Timedelta(input_delta) # Attempt to extract number of days days = np.NaN if pd.notnull(delta): days = delta.days # Return result return days
def test_timings_in_context(self): '''Test that timings_in_context gives us the right results.''' in_context = analyze.timings_in_context(self.samples) # Since each "function" has a fixed frequency, we can create # two series with TimedeltaIndexes and align them into the # same DataFrame, which should be what timings_in_context # gives us. fn1_expected = pd.Series( 1.1, index=pd.TimedeltaIndex( freq=pd.Timedelta('1.1s'), start='1.1s', periods=20, name='time')) fn2_expected = pd.Series( 1.5, index=pd.TimedeltaIndex( freq=pd.Timedelta('1.5s'), start='1.5s', periods=20, name='time')) expected = pd.DataFrame({ 'fn1': fn1_expected, 'fn2': fn2_expected }) pdt.assert_frame_equal(in_context, expected)
def test_timedelta_format(): x = [timedelta(days=7*i) for i in range(5)] labels = timedelta_format()(x) assert labels == ['0', '1 week', '2 weeks', '3 weeks', '4 weeks'] x = [pd.Timedelta(seconds=600*i) for i in range(5)] labels = timedelta_format()(x) assert labels == \ ['0', '10 minutes', '20 minutes', '30 minutes', '40 minutes'] # specific units labels = timedelta_format(units='h')(x) assert labels == \ ['0', '0.1667 hours', '0.3333 hours', '0.5000 hours', '0.6667 hours'] # usetex x = [timedelta(microseconds=7*i) for i in range(5)] labels = timedelta_format(units='us', usetex=True)(x) assert labels == \ ['0', '7$\\mu s$', '14$\\mu s$', '21$\\mu s$', '28$\\mu s$']
def test_create_pnl_sweep_no_event_open_pnl_only(self): blt = blotter.Blotter(self.prices, self.rates, base_ccy="USD", sweep_time=None, accrual_time=pd.Timedelta("0h"), eod_time=pd.Timedelta("0h")) blt.connect_market_data() ts = pd.Timestamp('2015-08-03T12:00:00') pos = 1 blt.define_generic("SXM", "CAD", 0, 1, 0) blt.map_instrument("SXM", "SXMZ15") blt.trade(ts, 'SXMZ15', pos, price=800, ntc_price=800) ts = pd.Timestamp('2015-08-04T00:00:00') blt.automatic_events(ts) evs = blt.create_events(ts, "PNL_SWEEP") evs_exp = [] self.assertEventsEqual(evs, evs_exp)
def test_automatic_events_future_type_creation(self): blt = blotter.Blotter(self.prices, self.rates, accrual_time=pd.Timedelta(0, unit='h'), eod_time=pd.Timedelta(0, unit='h'), sweep_time=pd.Timedelta(0, unit='h')) blt.connect_market_data() blt.define_generic("ES", "USD", 0.1, 100, 2.50) blt.map_instrument("ES", "ESZ15") ts = pd.Timestamp('2015-08-04T10:00:00') number_instr = 1 blt._trade(ts, "ESZ15", number_instr, 2000) blt.automatic_events(pd.Timestamp('2015-08-05T10:00:00')) ev_types = [] for ev in blt.event_log: ev_types.append(ev.split("|")[0]) ev_types_exp = ["TRADE", "INTEREST", "PNL"] self.assertEqual(ev_types, ev_types_exp)
def test_automatic_events_closed_pnl_mark(self): blt = blotter.Blotter(self.prices, self.rates, accrual_time=pd.Timedelta(0, unit='h'), eod_time=pd.Timedelta(0, unit='h'), sweep_time=pd.Timedelta(0, unit='h')) blt.connect_market_data() blt.define_generic("ES", ccy="USD", margin=0, multiplier=1, commission=0, isFX=False) blt.map_instrument("ES", "ESZ15") ts = pd.Timestamp("2015-08-04T11:00:00") blt.trade(ts, "ESZ15", 1, 2000) ts = pd.Timestamp("2015-08-04T12:00:00") hlds = blt.get_instruments() for instr, qty in hlds.iteritems(): blt.trade(ts, instr, -qty, 2001) ts = pd.Timestamp("2015-08-05T00:00:00") blt.automatic_events(ts) pnl_history = blt.get_pnl_history() usd = pd.DataFrame([[1.0, 1.0, 0.0]], index=[ts], columns=["pnl", "closed pnl", "open pnl"]) pnl_history_exp = {"USD": usd} self.assertDictDataFrameEqual(pnl_history, pnl_history_exp)
def analyze_frequency_for_group(df, date_col, group_cols): dates_and_keys = df[[date_col] + group_cols].sort_values(date_col) shifted = dates_and_keys.groupby(group_cols)[date_col].shift(1) diffs = (dates_and_keys[date_col] - shifted) diff_value_counts = diffs.value_counts() frequency = diff_value_counts.index[0] for diff, count in diff_value_counts.iteritems(): if frequency == pd.Timedelta(0): raise ValueError('Many duplicate dates found in time series. If these dates belong to ' 'different series, specify the key for the series in' 'make_time_series with the parameter series_key_col.') if diff % frequency != pd.Timedelta(0): raise ValueError('Can not determine frequency of time series. Found gap of length {}, ' 'which is not a multiple of the assumed frequency of {}' .format(diff, frequency)) return frequency
def cv_splits(self, input): dates = input[self.date_split_col] left = dates.max() split_points = [] for i in range(self.n_folds): right = left left = left - pd.Timedelta(days=self.prediction_length) split_points.append((left, right)) split_points.reverse() if split_points[0][0] - dates.min() < pd.Timedelta(days=self.prediction_length): raise Exception('Training set is shorter than the prediction length. Use a less' 'cross validation folds or a shorter prediction length') split_indices = [] for left, right in split_points: train = input[dates < left] cv = input[(dates >= left) & (dates < right)] split_indices.append([train.index.values, cv.index.values]) return split_indices
def make_commodity_future_info(first_sid, root_symbols, years, month_codes=None): """ Make futures testing data that simulates the notice/expiration date behavior of physical commodities like oil. Parameters ---------- first_sid : int root_symbols : list[str] years : list[int] month_codes : dict[str -> int] Expiration dates are on the 20th of the month prior to the month code. Notice dates are are on the 20th two months prior to the month code. Start dates are one year before the contract month. See Also -------- make_future_info """ nineteen_days = pd.Timedelta(days=19) one_year = pd.Timedelta(days=365) return make_future_info( first_sid=first_sid, root_symbols=root_symbols, years=years, notice_date_func=lambda dt: dt - MonthBegin(2) + nineteen_days, expiration_date_func=lambda dt: dt - MonthBegin(1) + nineteen_days, start_date_func=lambda dt: dt - one_year, month_codes=month_codes, )
def test_offset(self): """ Test the offset method of FutureChain. """ cl = FutureChain(self.asset_finder, lambda: '2005-12-01', 'CL') # Test that an offset forward sets as_of_date as expected self.assertEqual( cl.offset('3 days').as_of_date, cl.as_of_date + pd.Timedelta(days=3) ) # Test that an offset backward sets as_of_date as expected, with # time delta given as str, datetime.timedelta, and pd.Timedelta. self.assertEqual( cl.offset('-1000 days').as_of_date, cl.as_of_date + pd.Timedelta(days=-1000) ) self.assertEqual( cl.offset(timedelta(days=-1000)).as_of_date, cl.as_of_date + pd.Timedelta(days=-1000) ) self.assertEqual( cl.offset(pd.Timedelta('-1000 days')).as_of_date, cl.as_of_date + pd.Timedelta(days=-1000) ) # An offset of zero should give the original chain. self.assertEqual(cl[0], cl.offset(0)[0]) self.assertEqual(cl[0], cl.offset("0 days")[0]) # A string that doesn't represent a time delta should raise a # ValueError. with self.assertRaises(ValueError): cl.offset("blah")
def test_cached_object(self): expiry = Timestamp('2014') before = expiry - Timedelta('1 minute') after = expiry + Timedelta('1 minute') obj = CachedObject(1, expiry) self.assertEqual(obj.unwrap(before), 1) self.assertEqual(obj.unwrap(expiry), 1) # Unwrap on expiry is allowed. with self.assertRaises(Expired) as e: obj.unwrap(after) self.assertEqual(e.exception.args, (expiry,))
def compute_commit_periods(self, ticket_frame: pd.DataFrame): commit_dates = ticket_frame.CommitDate commit_periods = self.compute_periods(commit_dates) commit_periods = pd.concat( [pd.Series(data=[pd.Timedelta(days=0)]), commit_periods]).reset_index(drop=True) ticket_frame.insert(8, 'CommitPeriod', commit_periods.dt.days) return ticket_frame
def getMinutesFromMidnight(df, feature): time_deltas = pd.to_datetime(df[feature]) - pd.to_datetime(df['timestamp']) mins = [time / pd.Timedelta('1 minute') for time in time_deltas] return [time if not pd.isnull(time) else np.nan for time in mins]
def zero_pad_series(series): """ """ N = len(series) next_log2 = math.ceil(math.log(N, 2)) M = int(2**next_log2 - N) indices = [series.index[-1] + PD.Timedelta(seconds=x) for x in range(1, M + 1)] zero_series = PD.Series(data=NP.zeros(M), index=indices) return PD.concat([series, zero_series])
def parse_time_period(cls, period): """ try to parse specified time period :param period: specified period """ # catch single value if not isinstance(period, dict): period = dict(value=period) # try to parse specified period try: return pd.Timedelta(**period).delta except Exception as ex: cls.log().critical('unable to parse period: %s', str(period)) raise ex
def comp_date(day): """Get date/time from day of year""" import pandas as pd return pd.Timestamp('1976-01-01') + pd.Timedelta('{:d}D'.format(day - 1))
def get_holiday_df(day): import datetime holiday_df = pd.DataFrame.from_csv(HOLIDAY_PATH) index_t = holiday_df.init_date.apply(lambda x: datetime.datetime.strptime(x[:10], '%Y/%m/%d')) holiday_df.pop('init_date') holiday_df = holiday_df.set_index(index_t) holiday_df.index += pd.Timedelta('%dD'%(30+(day-1))) #holiday_df = holiday_df.ix[:,day:30+day] holiday_df.columns = map(lambda x:'festday#%d'%x,range(-30-(day-1),31-(day-1)+5)) return holiday_df
def get_festday_df(day): import datetime holiday_df = pd.DataFrame.from_csv(FEST_PATH) index_t = holiday_df.init_date.apply(lambda x: datetime.datetime.strptime(x[:10], '%Y/%m/%d')) holiday_df.pop('init_date') holiday_df = holiday_df.set_index(index_t) holiday_df.index += pd.Timedelta('%dD'%(30+(day-1))) #holiday_df = holiday_df.ix[:,day:30+day] holiday_df.columns = map(lambda x:'holiday#%d'%x,range(-30-(day-1),31-(day-1)+5)) return holiday_df
def get_prophet_df(user_id): prophet_df = pd.DataFrame.from_csv(PROPHET_PATH+'%d.csv'%user_id) prophet_df.index = pd.to_datetime(prophet_df.ds) prophet_df = prophet_df[get_prophet_columns()] #predict 31 days new_df = pd.DataFrame(index = prophet_df.index[31:-3]) for col in prophet_df.columns: t_col = prophet_df[col].copy() t_col.index += pd.Timedelta('3D') #feature 3 days #predict 33 days for day in range(-3,31+3): new_df[col+'#%d'%day] = t_col t_col.index -= pd.Timedelta('1D') return new_df.dropna()
def get_weather_df(): weather_df = pd.DataFrame.from_csv(WEATHER_PATH) weather_df = weather_df[get_weather_columns()] #predict 30 days new_df = pd.DataFrame(index = weather_df.index[30:-88-3]) for col in weather_df.columns: t_col = weather_df[col].copy() t_col.index += pd.Timedelta('3D') #feature 7 days #predict 30 days for day in range(-30,31+3): new_df[col+'#%d'%day] = t_col t_col.index -= pd.Timedelta('1D') return new_df.dropna()
def plotWeekly(dictframe, ax, uncertainty, weeklyStart, color='#0072B2'): if ax is None: figW = plt.figure(facecolor='w', figsize=(10, 6)) ax = figW.add_subplot(111) else: figW = ax.get_figure() ## # Create a list of 7 days for the x axis of the plot ## days = (pd.date_range(start='2017-01-01', periods=7) + pd.Timedelta(days=weeklyStart)) ## # Find the weekday seasonality values for each weekday ## weekdays = dictframe.ds.dt.weekday ind = [] for weekday in range(7): ind.append(max(weekdays[weekdays == weekday].index.tolist())) ## # Plot only one weekday each ## ax.plot(range(len(days)), dictframe['weekly'][ind], ls='-', c=color) ## # Plot uncertainty if necessary ## if uncertainty: ax.fill_between(range(len(days)),dictframe['weekly_lower'][ind], dictframe['weekly_upper'][ind],color=color, alpha=0.2) ax.grid(True, which='major', c='gray', ls='-', lw=1, alpha=0.2) ax.set_xticks(range(len(days))) ax.set_xticklabels(dictframe['ds'][ind].dt.weekday_name) ax.set_xlabel('Day of week') ax.set_ylabel('weekly') figW.tight_layout() return figW
def _get_min_acceptable_period(self): return pd.Timedelta('1 days')
def _get_min_acceptable_period(self): return pd.Timedelta('1 hours')
def date_range(schedule, frequency, closed='right', force_close=True, **kwargs): """ Given a schedule will return a DatetimeIndex will all of the valid datetime at the frequency given. The schedule values are assumed to be in UTC. :param schedule: schedule DataFrame :param frequency: frequency in standard string :param closed: same meaning as pandas date_range. 'right' will exclude the first value and should be used when the results should only include the close for each bar. :param force_close: if True then the close of the day will be included even if it does not fall on an even frequency. If False then the market close for the day may not be included in the results :param kwargs: arguments that will be passed to the pandas date_time :return: DatetimeIndex """ if pd.Timedelta(frequency) > pd.Timedelta('1D'): raise ValueError('Frequency must be 1D or higher frequency.') kwargs['closed'] = closed ranges = list() for row in schedule.itertuples(): dates = pd.date_range(row.market_open, row.market_close, freq=frequency, tz='UTC', **kwargs) if force_close: if row.market_close not in dates: dates = dates.insert(len(dates), row.market_close) ranges.append(dates) index = pd.DatetimeIndex([], tz='UTC') return index.union_many(ranges)
def days_at_time(days, t, tz, day_offset=0): """ Create an index of days at time ``t``, interpreted in timezone ``tz``. The returned index is localized to UTC. In the example below, the times switch from 13:45 to 12:45 UTC because March 13th is the daylight savings transition for US/Eastern. All the times are still 8:45 when interpreted in US/Eastern. >>> import pandas as pd; import datetime; import pprint >>> dts = pd.date_range('2016-03-12', '2016-03-14') >>> dts_at_845 = days_at_time(dts, datetime.time(8, 45), 'US/Eastern') >>> pprint.pprint([str(dt) for dt in dts_at_845]) ['2016-03-12 13:45:00+00:00', '2016-03-13 12:45:00+00:00', '2016-03-14 12:45:00+00:00'] :param days: DatetimeIndex An index of dates (represented as midnight). :param t: datetime.time The time to apply as an offset to each day in ``days``. :param tz: pytz.timezone The timezone to use to interpret ``t``. :param day_offset: int The number of days we want to offset @days by :return: DatetimeIndex of date with the time t """ if len(days) == 0: return pd.DatetimeIndex(days).tz_localize(tz).tz_convert('UTC') # Offset days without tz to avoid timezone issues. days = DatetimeIndex(days).tz_localize(None) delta = pd.Timedelta( days=day_offset, hours=t.hour, minutes=t.minute, seconds=t.second, ) return (days + delta).tz_localize(tz).tz_convert('UTC')