Python pandas 模块,Timedelta() 实例源码


项目:zipline-chinese    作者:zhanghan1990    | 项目源码 | 文件源码
def offset(self, time_delta):
        """ Get the future chain for this root symbol with a given
        offset from the current as_of_date.

        time_delta : datetime.timedelta or pandas.Timedelta or str
            The offset from the current as_of_date for the new chain.


        return self.as_of(self.as_of_date + Timedelta(time_delta))

项目:tianchi_power    作者:lvniqi    | 项目源码 | 文件源码
def predict_tf_once(day,start_date = '2016-10-1'):
    all_dataset = get_dataset(day)
    all_dataset = map(lambda x:x.ix[start_date:start_date],all_dataset)
    y_p_features = map(lambda user_id:tf_percent_model.resample_x_y_(all_dataset,user_id)[0].reshape(-1),get_full_user_ids())
    y_p_features_df = pd.DataFrame(y_p_features,index = get_full_user_ids())
    percent = pd.DataFrame.from_csv('./features/tensorflow_model/percent_model/%d.csv'%day)
    #percent = pd.DataFrame.from_csv('./features/tensorflow_model/percent_model/%d.csv'%2)
    percent = percent[map(lambda x:'percent#%d'%x,range(_feature_length))]
    t = pd.DataFrame(index = percent.index)
    t[pd.Timestamp(start_date)+pd.Timedelta('%dd'%(day-1))] = (np.array(y_p_features_df)*percent).sum(axis=1)
    t = t.T
    real = int(np.round((np.array(y_p_features_df)*percent).sum().sum()))
    print (day,real)
    return (day,real)
项目:tianchi_power    作者:lvniqi    | 项目源码 | 文件源码
def make_month_features(holiday_df):
    df_list = []
    for cols in ['holiday','festday']:
        new_df = pd.DataFrame(index = holiday_df.index)
        holi = holiday_df[cols].copy()
        holi_new = holi.copy()
        #predict 30 days and 30days for features
        for d in range(30):
            holi_new.index += pd.Timedelta('1D')
            new_df['%s#-%d'%(cols,d+1)] = holi_new
        #create 31 models
        for d in range(31+3):
            #predict 31 days + 3days
            new_df['%s#%d'%(cols,d)] = holi
            holi.index -= pd.Timedelta('1D')
        new_df = new_df[map(lambda day:'%s#%d'%(cols,day),range(-30,30+3))]
        new_df = new_df.ix['2015-1-1':'2016-12-31']
    return df_list
项目:tianchi_power    作者:lvniqi    | 项目源码 | 文件源码
def make_features(user_id,user_df):
    print 'user_id:', user_id
    power = user_df.power_consumption
    assert power.index[0] == user_df.index[0]
    assert len(user_df.index) == 639
    new_df = pd.DataFrame(index=user_df.index.union(pd.date_range('2016-9-1','2016-9-30')))
    pw_new = power.copy()
    #predict 30 days and 30days for features
    for d in range(60):
        pw_new.index += pd.Timedelta('1D')
        new_df['power#-%d'%(d+1)] = pw_new
    #create 30 models
    for d in range(30):
        #30 days features
        x_ = new_df[new_df.columns[d:30+d]]
        x_['y'] = power

    #return x_
项目:tianchi_power    作者:lvniqi    | 项目源码 | 文件源码
def make_month_features(user_id,user_df):
    print 'user_id:', user_id
    power = user_df.power_consumption.copy()
    assert power.index[0] == user_df.index[0]
    new_df = pd.DataFrame(index=user_df.index.union(pd.date_range('2016-10-1','2016-10-31')))
    pw_new = power.copy()
    #predict 30 days and 30days for features
    for d in range(30):
        pw_new.index += pd.Timedelta('1D')
        new_df['power#-%d'%(d+1)] = pw_new
    #create 30 models
    for d in range(31):
        #30 days features
        new_df['y#%d'%d] = power
        power.index -= pd.Timedelta('1D')
    return new_df
项目:tianchi_power    作者:lvniqi    | 项目源码 | 文件源码
def make_history_month_features(user_id,user_df):
    print 'user_id:', user_id
    power = user_df.power_consumption.copy()
    feature_df = history_feature(power)
    new_df = pd.DataFrame(index = feature_df.index)
    #create 30 models
    for d in range(30):
        for cols in feature_df:
            #30 days features
            new_df[cols+'#%d'%d] = feature_df[cols]
        feature_df.index -= pd.Timedelta('1D')
    new_df = new_df.dropna()
    return new_df
项目:combine    作者:llllllllll    | 项目源码 | 文件源码
def train(obj, user, replays, age):
    """Manually train the model for a given user.
    import os
    import pickle

    import pandas as pd
    from slider import Library, Client
    from slider.model import train_from_replay_directory

    if age is not None:
        age = pd.Timedelta(age)

    m = train_from_replay_directory(
        client=Client(Library(obj.maps), obj.api_key),
    with open(os.path.join(obj.models, user), 'wb') as f:
        pickle.dump(m, f)
项目:combine    作者:llllllllll    | 项目源码 | 文件源码
def gen_token(token_secret, user):
    """Generate a token for a user.

    token_secret : secret
        The secret to encrypt with.
    user : str
        The user to make a token for.

    token : str
        The encrypted token.
    now ='utc')
    return token_secret.encrypt(
            'issued': now.isoformat(),
            'expires': (now + pd.Timedelta(hours=12)).isoformat(),
            'user': user,
项目:catalyst    作者:enigmampc    | 项目源码 | 文件源码
def should_trigger(self, dt):
        if is None or dt >= self.next_date:
            # initialize or reset for new date
            self.triggered = False
   = dt

            # record the timestamp for the next day, so that we can use it
            # to know if we've moved to the next day
            self.next_date = dt + pd.Timedelta(1, unit="d")

        if not self.triggered and self.rule.should_trigger(dt):
            self.triggered = True
            return True

# Factory API
项目:strategy    作者:kanghua309    | 项目源码 | 文件源码
def _update(stock, conn):
        print "update ----- :", stock
        query = "select * from '%s' order by date" % stock
        df = pd.read_sql(query, conn)
        df = df.set_index('date')

        print "sql saved:", df.tail(1),df.ix[-1],df.ix[-1].name
        if == 5:
            today = str(pd.Timestamp( - pd.Timedelta(days=1))[:10]
        elif == 6:
            today = str(pd.Timestamp( - pd.Timedelta(days=2))[:10]
            today = str(pd.Timestamp([:10]
        print "today:",today
        if today != df.ix[-1].name[:10]:
            df = ts.get_h_data(stock, start=df.ix[-1].name[:10], retry_count=5, pause=1)
            print "read from tu:",df.head(1)
            df[['open', 'high', 'close', 'low', 'volume']].to_sql(stock, conn, if_exists='append')
            import time
    except Exception, arg:
        print "exceptionu:", stock, arg
项目:fakestockdata    作者:mrocklin    | 项目源码 | 文件源码
def generate_stock(fn, directory=None, freq=pd.Timedelta(seconds=60),
    start = pd.Timestamp(start)
    directory = directory or os.path.join('data', 'generated')
    fn2 = os.path.split(fn)[1]
    sym = fn2[len('table_'):fn2.find('.csv')]
    if not os.path.exists(directory):
    if not os.path.exists(os.path.join(directory, sym)):
        os.mkdir(os.path.join(directory, sym))

    df = load_file(fn)
    for date, rec in df.to_dict(orient='index').items():
        if start <= pd.Timestamp(date) <= end:
            df2 = generate_day(date, freq=freq, **rec)
            fn2 = os.path.join(directory, sym, str(date).replace(' ', 'T') + '.csv')
    print('Finished %s' % sym)
项目:PyDataLondon29-EmbarrassinglyParallelDAWithAWSLambda    作者:SignalMedia    | 项目源码 | 文件源码
def test_add_iadd(self):

        # only test adding/sub offsets as + is now numeric

        # offset
        offsets = [pd.offsets.Hour(2), timedelta(hours=2),
                   np.timedelta64(2, 'h'), Timedelta(hours=2)]

        for delta in offsets:
            rng = timedelta_range('1 days', '10 days')
            result = rng + delta
            expected = timedelta_range('1 days 02:00:00', '10 days 02:00:00',
            tm.assert_index_equal(result, expected)
            rng += delta
            tm.assert_index_equal(rng, expected)

        # int
        rng = timedelta_range('1 days 09:00:00', freq='H', periods=10)
        result = rng + 1
        expected = timedelta_range('1 days 10:00:00', freq='H', periods=10)
        tm.assert_index_equal(result, expected)
        rng += 1
        tm.assert_index_equal(rng, expected)
项目:PyDataLondon29-EmbarrassinglyParallelDAWithAWSLambda    作者:SignalMedia    | 项目源码 | 文件源码
def test_overflow(self):
        # GH 9442
        s = Series(pd.date_range('20130101', periods=100000, freq='H'))
        s[0] += pd.Timedelta('1s 1ms')

        # mean
        result = (s - s.min()).mean()
        expected = pd.Timedelta((pd.DatetimeIndex((s - s.min())).asi8 / len(s)

        # the computation is converted to float so might be some loss of
        # precision
        self.assertTrue(np.allclose(result.value / 1000, expected.value /

        # sum
        self.assertRaises(ValueError, lambda: (s - s.min()).sum())
        s1 = s[0:10000]
        self.assertRaises(ValueError, lambda: (s1 - s1.min()).sum())
        s2 = s[0:1000]
        result = (s2 - s2.min()).sum()
项目:PyDataLondon29-EmbarrassinglyParallelDAWithAWSLambda    作者:SignalMedia    | 项目源码 | 文件源码
def test_to_timedelta_on_nanoseconds(self):
        # GH 9273
        result = Timedelta(nanoseconds=100)
        expected = Timedelta('100ns')
        self.assertEqual(result, expected)

        result = Timedelta(days=1, hours=1, minutes=1, weeks=1, seconds=1,
                           milliseconds=1, microseconds=1, nanoseconds=1)
        expected = Timedelta(694861001001001)
        self.assertEqual(result, expected)

        result = Timedelta(microseconds=1) + Timedelta(nanoseconds=1)
        expected = Timedelta('1us1ns')
        self.assertEqual(result, expected)

        result = Timedelta(microseconds=1) - Timedelta(nanoseconds=1)
        expected = Timedelta('999ns')
        self.assertEqual(result, expected)

        result = Timedelta(microseconds=1) + 5 * Timedelta(nanoseconds=-2)
        expected = Timedelta('990ns')
        self.assertEqual(result, expected)

        self.assertRaises(TypeError, lambda: Timedelta(nanoseconds='abc'))
项目:PyDataLondon29-EmbarrassinglyParallelDAWithAWSLambda    作者:SignalMedia    | 项目源码 | 文件源码
def test_timedelta_hash_equality(self):
        # GH 11129
        v = Timedelta(1, 'D')
        td = timedelta(days=1)
        self.assertEqual(hash(v), hash(td))

        d = {td: 2}
        self.assertEqual(d[v], 2)

        tds = timedelta_range('1 second', periods=20)
        self.assertTrue(all(hash(td) == hash(td.to_pytimedelta()) for td in

        # python timedeltas drop ns resolution
        ns_td = Timedelta(1, 'ns')
        self.assertNotEqual(hash(ns_td), hash(ns_td.to_pytimedelta()))
项目:PyDataLondon29-EmbarrassinglyParallelDAWithAWSLambda    作者:SignalMedia    | 项目源码 | 文件源码
def test_get_indexer(self):
        idx = pd.date_range('2000-01-01', periods=3)
        tm.assert_numpy_array_equal(idx.get_indexer(idx), [0, 1, 2])

        target = idx[0] + pd.to_timedelta(['-1 hour', '12 hours',
                                           '1 day 1 hour'])
        tm.assert_numpy_array_equal(idx.get_indexer(target, 'pad'), [-1, 0, 1])
            idx.get_indexer(target, 'backfill'), [0, 1, 2])
            idx.get_indexer(target, 'nearest'), [0, 1, 1])
            idx.get_indexer(target, 'nearest',
                            tolerance=pd.Timedelta('1 hour')),
            [0, -1, 1])
        with tm.assertRaises(ValueError):
            idx.get_indexer(idx[[0]], method='nearest', tolerance='foo')
项目:PyDataLondon29-EmbarrassinglyParallelDAWithAWSLambda    作者:SignalMedia    | 项目源码 | 文件源码
def test_get_loc(self):
        idx = pd.to_timedelta(['0 days', '1 days', '2 days'])

        for method in [None, 'pad', 'backfill', 'nearest']:
            self.assertEqual(idx.get_loc(idx[1], method), 1)
            self.assertEqual(idx.get_loc(idx[1].to_pytimedelta(), method), 1)
            self.assertEqual(idx.get_loc(str(idx[1]), method), 1)

            idx.get_loc(idx[1], 'pad', tolerance=pd.Timedelta(0)), 1)
            idx.get_loc(idx[1], 'pad', tolerance=np.timedelta64(0, 's')), 1)
        self.assertEqual(idx.get_loc(idx[1], 'pad', tolerance=timedelta(0)), 1)

        with tm.assertRaisesRegexp(ValueError, 'must be convertible'):
            idx.get_loc(idx[1], method='nearest', tolerance='foo')

        for method, loc in [('pad', 1), ('backfill', 2), ('nearest', 1)]:
            self.assertEqual(idx.get_loc('1 day 1 hour', method), loc)
项目:PyDataLondon29-EmbarrassinglyParallelDAWithAWSLambda    作者:SignalMedia    | 项目源码 | 文件源码
def test_column_dups_indexing2(self):

        # GH 8363
        # datetime ops with a non-unique index
        df = DataFrame({'A': np.arange(5, dtype='int64'),
                        'B': np.arange(1, 6, dtype='int64')},
                       index=[2, 2, 3, 3, 4])
        result = df.B - df.A
        expected = Series(1, index=[2, 2, 3, 3, 4])
        assert_series_equal(result, expected)

        df = DataFrame({'A': date_range('20130101', periods=5),
                        'B': date_range('20130101 09:00:00', periods=5)},
                       index=[2, 2, 3, 3, 4])
        result = df.B - df.A
        expected = Series(pd.Timedelta('9 hours'), index=[2, 2, 3, 3, 4])
        assert_series_equal(result, expected)
项目:MetSim    作者:UW-Hydro    | 项目源码 | 文件源码
def _unpack_state(self, result: pd.DataFrame, locs: dict):
        """Put restart values in the state dataset"""
        # We concatenate with the old state values in case we don't
        # have 90 new days to use
        tmin = np.concatenate((self.state['t_min'].sel(**locs).values[:],
        tmax = np.concatenate((self.state['t_max'].sel(**locs).values[:],
        prec = np.concatenate((self.state['prec'].sel(**locs).values[:],
        self.state['t_min'].sel(**locs).values[:] = tmin[-90:]
        self.state['t_max'].sel(**locs).values[:] = tmax[-90:]
        self.state['prec'].sel(**locs).values[:] = prec[-90:]
        self.state['swe'].sel(**locs).values = result['swe'].values[-1]
        state_start = result.index[-1] - pd.Timedelta('89 days')
        self.state.time.values = date_range(state_start, result.index[-1],
项目:mars_express    作者:wsteitz    | 项目源码 | 文件源码
def event_to_min_per_hour(df, event):
    def hourly(start, end):
        ret = [(start.floor("1h"), 60 - start.minute)]
        t = start.ceil("1h")
        while t <= end:
            ret.append((t, 60))
            t += pd.Timedelta("1h")
        ret.append((end.floor("1h"), end.minute - 60))
        return ret

    df = df[df.event.str.contains(event)]

    res = []
    for i, (start, end, _) in df.iterrows():
        res += hourly(start, end)

    df = pd.DataFrame(res)
    df.columns = ['ut_ms', event + "_mins"]

    df = df.set_index('ut_ms')
    df = df.resample("1h").sum().fillna(0.0)
    return df
项目:prophet    作者:facebook    | 项目源码 | 文件源码
def test_simulated_historical_forecasts(self):
        m = Prophet()
        k = 2
        for p in [1, 10]:
            for h in [1, 3]:
                period = '{} days'.format(p)
                horizon = '{} days'.format(h)
                df_shf = diagnostics.simulated_historical_forecasts(
                    m, horizon=horizon, k=k, period=period)
                # All cutoff dates should be less than ds dates
                self.assertTrue((df_shf['cutoff'] < df_shf['ds']).all())
                # The unique size of output cutoff should be equal to 'k'
                self.assertEqual(len(np.unique(df_shf['cutoff'])), k)
                    max(df_shf['ds'] - df_shf['cutoff']),
                dc = df_shf['cutoff'].diff()
                dc = dc[dc > pd.Timedelta(0)].min()
                self.assertTrue(dc >= pd.Timedelta(period))
                # Each y in df_shf and self.__df with same ds should be equal
                df_merged = pd.merge(df_shf, self.__df, 'left', on='ds')
                    np.sum((df_merged['y_x'] - df_merged['y_y']) ** 2), 0.0)
项目:qutils    作者:Raychee    | 项目源码 | 文件源码
def ensure_timestamps(timestamps, func_get_latest_time=None, if_fail='ignore'):
    t_is_not_timestamp = [t is None or isinstance(t, (str, pd.Timedelta, timedelta))
                          for t in timestamps]
    if any(t_is_not_timestamp):
        if t_is_not_timestamp[-1]:
            last_timestamp = to_datetime(timestamps[-1], from_datetime=func_get_latest_time,
            if last_timestamp is None:
                if if_fail == 'ignore':
                    return []
                elif if_fail == 'raise':
                    raise ValueError('Cannot convert timestamps {!r}'.format(timestamps))
                    raise NotImplementedError('Unsupported handling method when fail: {}'
            timestamps[-1] = last_timestamp
            last_timestamp = timestamps[-1]
        timestamps[:-1] = [to_datetime(t, from_datetime=last_timestamp) if is_not_timestamp else t
                           for t, is_not_timestamp in zip(timestamps[:-1], t_is_not_timestamp[:-1])]
    # if all(t == timestamps[0] for t in timestamps[1:]):
    #     timestamps = [timestamps[0]]
    return timestamps
项目:JDcontest    作者:zsyandjyhouse    | 项目源码 | 文件源码
def get_comment_product_fea(endtime):
    enddt = pd.to_datetime(endtime,format = '%Y-%m-%d')
    if enddt == pd.to_datetime('2016-04-15',format = '%Y-%m-%d'):
        commentdata = pd.read_csv(FilePath + CommentFile)
        commentdata = commentdata[(commentdata["dt"] == "2016-04-15")]
        commentdata = commentdata.sort_values(by="sku_id").reset_index()[["sku_id", "comment_num", "has_bad_comment", "bad_comment_rate"]]
        return commentdata
        startdt = enddt - pd.Timedelta(days=7)
        commentpath = FilePath + CommentFile
        commentdata_ALL = pd.read_csv(commentpath)  # ?Jdatya_comment.csv??????
        commentdata_ALL.dt = pd.to_datetime(commentdata_ALL.dt, format='%Y-%m-%d')  # ?dt????date??
        comment = commentdata_ALL[(commentdata_ALL.dt <= enddt) & (commentdata_ALL.dt > startdt)]
        df = pd.get_dummies(comment['comment_num'], prefix='comment_num')
        comment = pd.concat([comment, df], axis=1)
        comment = comment[['sku_id', 'has_bad_comment', 'bad_comment_rate', 'comment_num_1', 'comment_num_2', 'comment_num_3','comment_num_4']]
        sorted_comment = comment.sort_values(by=['sku_id']).reset_index().drop('index',1)
        #sorted_comment.to_csv(FilePath + 'skuFeaInComment_before'+str(enddt), index=False)
        return sorted_comment

# ????????
项目:Uber-DS-Challenge    作者:bjherger    | 项目源码 | 文件源码
def extract_days(input_delta):
    Helper function to extract the number of days from a time delta. Returns:
     - Number of days, if valid time delta
     - np.NaN if time delta is null or invalid
    :param input_delta:
    :return: number of days in time delta
    :rtype: float

    # Attempt to coerce into Pandas time delta
    delta = pd.Timedelta(input_delta)

    # Attempt to extract number of days
    days = np.NaN
    if pd.notnull(delta):
        days = delta.days

    # Return result
    return days
项目:perfume    作者:leifwalsh    | 项目源码 | 文件源码
def test_timings_in_context(self):
        '''Test that timings_in_context gives us the right results.'''
        in_context = analyze.timings_in_context(self.samples)
        # Since each "function" has a fixed frequency, we can create
        # two series with TimedeltaIndexes and align them into the
        # same DataFrame, which should be what timings_in_context
        # gives us.
        fn1_expected = pd.Series(
                start='1.1s', periods=20, name='time'))
        fn2_expected = pd.Series(
                start='1.5s', periods=20, name='time'))
        expected = pd.DataFrame({
            'fn1': fn1_expected,
            'fn2': fn2_expected
        pdt.assert_frame_equal(in_context, expected)
项目:mizani    作者:has2k1    | 项目源码 | 文件源码
def test_timedelta_format():
    x = [timedelta(days=7*i) for i in range(5)]
    labels = timedelta_format()(x)
    assert labels == ['0', '1 week', '2 weeks', '3 weeks', '4 weeks']

    x = [pd.Timedelta(seconds=600*i) for i in range(5)]
    labels = timedelta_format()(x)
    assert labels == \
        ['0', '10 minutes', '20 minutes', '30 minutes', '40 minutes']

    # specific units
    labels = timedelta_format(units='h')(x)
    assert labels == \
        ['0', '0.1667 hours', '0.3333 hours', '0.5000 hours',
         '0.6667 hours']
    # usetex
    x = [timedelta(microseconds=7*i) for i in range(5)]
    labels = timedelta_format(units='us', usetex=True)(x)
    assert labels == \
        ['0', '7$\\mu s$', '14$\\mu s$', '21$\\mu s$', '28$\\mu s$']
项目:blotter    作者:matthewgilbert    | 项目源码 | 文件源码
def test_create_pnl_sweep_no_event_open_pnl_only(self):
        blt = blotter.Blotter(self.prices, self.rates, base_ccy="USD",
        ts = pd.Timestamp('2015-08-03T12:00:00')
        pos = 1
        blt.define_generic("SXM", "CAD", 0, 1, 0)
        blt.map_instrument("SXM", "SXMZ15"), 'SXMZ15', pos, price=800, ntc_price=800)
        ts = pd.Timestamp('2015-08-04T00:00:00')
        evs = blt.create_events(ts, "PNL_SWEEP")
        evs_exp = []
        self.assertEventsEqual(evs, evs_exp)
项目:blotter    作者:matthewgilbert    | 项目源码 | 文件源码
def test_automatic_events_future_type_creation(self):
        blt = blotter.Blotter(self.prices, self.rates,
                              accrual_time=pd.Timedelta(0, unit='h'),
                              eod_time=pd.Timedelta(0, unit='h'),
                              sweep_time=pd.Timedelta(0, unit='h'))
        blt.define_generic("ES", "USD", 0.1, 100, 2.50)
        blt.map_instrument("ES", "ESZ15")
        ts = pd.Timestamp('2015-08-04T10:00:00')
        number_instr = 1
        blt._trade(ts, "ESZ15", number_instr, 2000)

        ev_types = []
        for ev in blt.event_log:

        ev_types_exp = ["TRADE", "INTEREST", "PNL"]
        self.assertEqual(ev_types, ev_types_exp)
项目:blotter    作者:matthewgilbert    | 项目源码 | 文件源码
def test_automatic_events_closed_pnl_mark(self):
        blt = blotter.Blotter(self.prices, self.rates,
                              accrual_time=pd.Timedelta(0, unit='h'),
                              eod_time=pd.Timedelta(0, unit='h'),
                              sweep_time=pd.Timedelta(0, unit='h'))
        blt.define_generic("ES", ccy="USD", margin=0, multiplier=1,
                           commission=0, isFX=False)
        blt.map_instrument("ES", "ESZ15")

        ts = pd.Timestamp("2015-08-04T11:00:00"), "ESZ15", 1, 2000)

        ts = pd.Timestamp("2015-08-04T12:00:00")
        hlds = blt.get_instruments()
        for instr, qty in hlds.iteritems():
  , instr, -qty, 2001)

        ts = pd.Timestamp("2015-08-05T00:00:00")
        pnl_history = blt.get_pnl_history()
        usd = pd.DataFrame([[1.0, 1.0, 0.0]], index=[ts],
                           columns=["pnl", "closed pnl", "open pnl"])
        pnl_history_exp = {"USD": usd}
        self.assertDictDataFrameEqual(pnl_history, pnl_history_exp)
项目:guacml    作者:guacml    | 项目源码 | 文件源码
def analyze_frequency_for_group(df, date_col, group_cols):
    dates_and_keys = df[[date_col] + group_cols].sort_values(date_col)
    shifted = dates_and_keys.groupby(group_cols)[date_col].shift(1)

    diffs = (dates_and_keys[date_col] - shifted)
    diff_value_counts = diffs.value_counts()
    frequency = diff_value_counts.index[0]
    for diff, count in diff_value_counts.iteritems():
        if frequency == pd.Timedelta(0):
            raise ValueError('Many duplicate dates found in time series. If these dates belong to '
                             'different series, specify the key for the series in'
                             'make_time_series with the parameter series_key_col.')
        if diff % frequency != pd.Timedelta(0):
            raise ValueError('Can not determine frequency of time series. Found gap of length {}, '
                             'which is not a multiple of the assumed frequency of {}'
                             .format(diff, frequency))
    return frequency
项目:guacml    作者:guacml    | 项目源码 | 文件源码
def cv_splits(self, input):
        dates = input[self.date_split_col]
        left = dates.max()
        split_points = []
        for i in range(self.n_folds):
            right = left
            left = left - pd.Timedelta(days=self.prediction_length)
            split_points.append((left, right))
        if split_points[0][0] - dates.min() < pd.Timedelta(days=self.prediction_length):
            raise Exception('Training set is shorter than the prediction length. Use a less'
                            'cross validation folds or a shorter prediction length')

        split_indices = []
        for left, right in split_points:
            train = input[dates < left]
            cv = input[(dates >= left) & (dates < right)]
            split_indices.append([train.index.values, cv.index.values])

        return split_indices
项目:zipline-chinese    作者:zhanghan1990    | 项目源码 | 文件源码
def make_commodity_future_info(first_sid,
    Make futures testing data that simulates the notice/expiration date
    behavior of physical commodities like oil.

    first_sid : int
    root_symbols : list[str]
    years : list[int]
    month_codes : dict[str -> int]

    Expiration dates are on the 20th of the month prior to the month code.
    Notice dates are are on the 20th two months prior to the month code.
    Start dates are one year before the contract month.

    See Also
    nineteen_days = pd.Timedelta(days=19)
    one_year = pd.Timedelta(days=365)
    return make_future_info(
        notice_date_func=lambda dt: dt - MonthBegin(2) + nineteen_days,
        expiration_date_func=lambda dt: dt - MonthBegin(1) + nineteen_days,
        start_date_func=lambda dt: dt - one_year,
项目:zipline-chinese    作者:zhanghan1990    | 项目源码 | 文件源码
def test_offset(self):
        """ Test the offset method of FutureChain.
        cl = FutureChain(self.asset_finder, lambda: '2005-12-01', 'CL')

        # Test that an offset forward sets as_of_date as expected
            cl.offset('3 days').as_of_date,
            cl.as_of_date + pd.Timedelta(days=3)

        # Test that an offset backward sets as_of_date as expected, with
        # time delta given as str, datetime.timedelta, and pd.Timedelta.
            cl.offset('-1000 days').as_of_date,
            cl.as_of_date + pd.Timedelta(days=-1000)
            cl.as_of_date + pd.Timedelta(days=-1000)
            cl.offset(pd.Timedelta('-1000 days')).as_of_date,
            cl.as_of_date + pd.Timedelta(days=-1000)

        # An offset of zero should give the original chain.
        self.assertEqual(cl[0], cl.offset(0)[0])
        self.assertEqual(cl[0], cl.offset("0 days")[0])

        # A string that doesn't represent a time delta should raise a
        # ValueError.
        with self.assertRaises(ValueError):
项目:zipline-chinese    作者:zhanghan1990    | 项目源码 | 文件源码
def test_cached_object(self):
        expiry = Timestamp('2014')
        before = expiry - Timedelta('1 minute')
        after = expiry + Timedelta('1 minute')

        obj = CachedObject(1, expiry)

        self.assertEqual(obj.unwrap(before), 1)
        self.assertEqual(obj.unwrap(expiry), 1)  # Unwrap on expiry is allowed.
        with self.assertRaises(Expired) as e:
        self.assertEqual(e.exception.args, (expiry,))
项目:saapy    作者:ashapochka    | 项目源码 | 文件源码
def compute_commit_periods(self, ticket_frame: pd.DataFrame):
        commit_dates = ticket_frame.CommitDate
        commit_periods = self.compute_periods(commit_dates)
        commit_periods = pd.concat(
        ticket_frame.insert(8, 'CommitPeriod', commit_periods.dt.days)
        return ticket_frame
项目:PersonalizedMultitaskLearning    作者:mitmedialab    | 项目源码 | 文件源码
def getMinutesFromMidnight(df, feature):
    time_deltas = pd.to_datetime(df[feature]) - pd.to_datetime(df['timestamp'])
    mins = [time / pd.Timedelta('1 minute') for time in time_deltas]
    return [time if not pd.isnull(time) else np.nan for time in mins]
项目:PersonalizedMultitaskLearning    作者:mitmedialab    | 项目源码 | 文件源码
def getMinutesFromMidnight(df, feature):
    time_deltas = pd.to_datetime(df[feature]) - pd.to_datetime(df['timestamp'])
    mins = [time / pd.Timedelta('1 minute') for time in time_deltas]
    return [time if not pd.isnull(time) else np.nan for time in mins]
项目:pyrsss    作者:butala    | 项目源码 | 文件源码
def zero_pad_series(series):
    N = len(series)
    next_log2 = math.ceil(math.log(N, 2))
    M = int(2**next_log2 - N)
    indices = [series.index[-1] + PD.Timedelta(seconds=x) for x in range(1, M + 1)]
    zero_series = PD.Series(data=NP.zeros(M),
    return PD.concat([series, zero_series])
项目:Eskapade    作者:KaveIO    | 项目源码 | 文件源码
def parse_time_period(cls, period):
        """ try to parse specified time period

        :param period: specified period
        # catch single value
        if not isinstance(period, dict):
            period = dict(value=period)

        # try to parse specified period
            return pd.Timedelta(**period).delta
        except Exception as ex:
            cls.log().critical('unable to parse period: %s', str(period))
            raise ex
项目:Eskapade    作者:KaveIO    | 项目源码 | 文件源码
def comp_date(day):
    """Get date/time from day of year"""

    import pandas as pd
    return pd.Timestamp('1976-01-01') + pd.Timedelta('{:d}D'.format(day - 1))
项目:Eskapade    作者:KaveIO    | 项目源码 | 文件源码
def comp_date(day):
    """Get date/time from day of year"""

    import pandas as pd
    return pd.Timestamp('1976-01-01') + pd.Timedelta('{:d}D'.format(day - 1))
项目:tianchi_power    作者:lvniqi    | 项目源码 | 文件源码
def get_holiday_df(day):
    import datetime
    holiday_df = pd.DataFrame.from_csv(HOLIDAY_PATH)
    index_t = holiday_df.init_date.apply(lambda x: datetime.datetime.strptime(x[:10], '%Y/%m/%d'))
    holiday_df = holiday_df.set_index(index_t)
    holiday_df.index += pd.Timedelta('%dD'%(30+(day-1)))
    #holiday_df = holiday_df.ix[:,day:30+day]
    holiday_df.columns = map(lambda x:'festday#%d'%x,range(-30-(day-1),31-(day-1)+5))
    return holiday_df
项目:tianchi_power    作者:lvniqi    | 项目源码 | 文件源码
def get_festday_df(day):
    import datetime
    holiday_df = pd.DataFrame.from_csv(FEST_PATH)
    index_t = holiday_df.init_date.apply(lambda x: datetime.datetime.strptime(x[:10], '%Y/%m/%d'))
    holiday_df = holiday_df.set_index(index_t)
    holiday_df.index += pd.Timedelta('%dD'%(30+(day-1)))
    #holiday_df = holiday_df.ix[:,day:30+day]
    holiday_df.columns = map(lambda x:'holiday#%d'%x,range(-30-(day-1),31-(day-1)+5))
    return holiday_df
项目:tianchi_power    作者:lvniqi    | 项目源码 | 文件源码
def get_prophet_df(user_id):
    prophet_df = pd.DataFrame.from_csv(PROPHET_PATH+'%d.csv'%user_id)
    prophet_df.index = pd.to_datetime(prophet_df.ds)
    prophet_df = prophet_df[get_prophet_columns()]
    #predict 31 days
    new_df = pd.DataFrame(index = prophet_df.index[31:-3])
    for col in prophet_df.columns:
        t_col = prophet_df[col].copy()
        t_col.index += pd.Timedelta('3D')
        #feature 3 days
        #predict 33 days
        for day in range(-3,31+3):
            new_df[col+'#%d'%day] = t_col
            t_col.index -= pd.Timedelta('1D')
    return new_df.dropna()
项目:tianchi_power    作者:lvniqi    | 项目源码 | 文件源码
def get_weather_df():
    weather_df = pd.DataFrame.from_csv(WEATHER_PATH)
    weather_df = weather_df[get_weather_columns()]
    #predict 30 days
    new_df = pd.DataFrame(index = weather_df.index[30:-88-3])
    for col in weather_df.columns:
        t_col = weather_df[col].copy()
        t_col.index += pd.Timedelta('3D')
        #feature 7 days
        #predict 30 days
        for day in range(-30,31+3):
            new_df[col+'#%d'%day] = t_col
            t_col.index -= pd.Timedelta('1D')
    return new_df.dropna()
项目:htsprophet    作者:CollinRooney12    | 项目源码 | 文件源码
def plotWeekly(dictframe, ax, uncertainty, weeklyStart, color='#0072B2'):

    if ax is None:
        figW = plt.figure(facecolor='w', figsize=(10, 6))
        ax = figW.add_subplot(111)
        figW = ax.get_figure()
    # Create a list of 7 days for the x axis of the plot
    days = (pd.date_range(start='2017-01-01', periods=7) +
    # Find the weekday seasonality values for each weekday
    weekdays = dictframe.ds.dt.weekday
    ind = []
    for weekday in range(7):
        ind.append(max(weekdays[weekdays == weekday].index.tolist()))
    # Plot only one weekday each
    ax.plot(range(len(days)), dictframe['weekly'][ind], ls='-', c=color)
    # Plot uncertainty if necessary
    if uncertainty:
        ax.fill_between(range(len(days)),dictframe['weekly_lower'][ind], dictframe['weekly_upper'][ind],color=color, alpha=0.2)
    ax.grid(True, which='major', c='gray', ls='-', lw=1, alpha=0.2)
    ax.set_xlabel('Day of week')
    return figW
项目:eemeter    作者:openeemeter    | 项目源码 | 文件源码
def _get_min_acceptable_period(self):
        return pd.Timedelta('1 days')
项目:eemeter    作者:openeemeter    | 项目源码 | 文件源码
def _get_min_acceptable_period(self):
        return pd.Timedelta('1 hours')
项目:pandas_market_calendars    作者:rsheftel    | 项目源码 | 文件源码
def date_range(schedule, frequency, closed='right', force_close=True, **kwargs):
    Given a schedule will return a DatetimeIndex will all of the valid datetime at the frequency given.
    The schedule values are assumed to be in UTC.

    :param schedule: schedule DataFrame
    :param frequency: frequency in standard string
    :param closed: same meaning as pandas date_range. 'right' will exclude the first value and should be used when the
      results should only include the close for each bar.
    :param force_close: if True then the close of the day will be included even if it does not fall on an even
      frequency. If False then the market close for the day may not be included in the results
    :param kwargs: arguments that will be passed to the pandas date_time
    :return: DatetimeIndex

    if pd.Timedelta(frequency) > pd.Timedelta('1D'):
        raise ValueError('Frequency must be 1D or higher frequency.')
    kwargs['closed'] = closed
    ranges = list()
    for row in schedule.itertuples():
        dates = pd.date_range(row.market_open, row.market_close, freq=frequency, tz='UTC', **kwargs)
        if force_close:
            if row.market_close not in dates:
                dates = dates.insert(len(dates), row.market_close)

    index = pd.DatetimeIndex([], tz='UTC')
    return index.union_many(ranges)
项目:pandas_market_calendars    作者:rsheftel    | 项目源码 | 文件源码
def days_at_time(days, t, tz, day_offset=0):
    Create an index of days at time ``t``, interpreted in timezone ``tz``. The returned index is localized to UTC.

    In the example below, the times switch from 13:45 to 12:45 UTC because
    March 13th is the daylight savings transition for US/Eastern.  All the
    times are still 8:45 when interpreted in US/Eastern.

    >>> import pandas as pd; import datetime; import pprint
    >>> dts = pd.date_range('2016-03-12', '2016-03-14')
    >>> dts_at_845 = days_at_time(dts, datetime.time(8, 45), 'US/Eastern')
    >>> pprint.pprint([str(dt) for dt in dts_at_845])
    ['2016-03-12 13:45:00+00:00',
     '2016-03-13 12:45:00+00:00',
     '2016-03-14 12:45:00+00:00']

    :param days: DatetimeIndex An index of dates (represented as midnight).
    :param t: datetime.time The time to apply as an offset to each day in ``days``.
    :param tz: pytz.timezone The timezone to use to interpret ``t``.
    :param day_offset: int The number of days we want to offset @days by
    :return: DatetimeIndex of date with the time t            
    if len(days) == 0:
        return pd.DatetimeIndex(days).tz_localize(tz).tz_convert('UTC')

    # Offset days without tz to avoid timezone issues.
    days = DatetimeIndex(days).tz_localize(None)
    delta = pd.Timedelta(
    return (days + delta).tz_localize(tz).tz_convert('UTC')