我们从Python开源项目中,提取了以下50个代码示例,用于说明如何使用pandas.to_timedelta()。
def _wrap_result(self, result, block=None, obj=None): """ wrap a single result """ if obj is None: obj = self._selected_obj if isinstance(result, np.ndarray): # coerce if necessary if block is not None: if com.is_timedelta64_dtype(block.values.dtype): result = pd.to_timedelta( result.ravel(), unit='ns').values.reshape(result.shape) if result.ndim == 1: from pandas import Series return Series(result, obj.index, name=obj.name) return type(obj)(result, index=obj.index, columns=block.columns) return result
def test_to_timedelta_invalid(self): # these will error self.assertRaises(ValueError, lambda: to_timedelta([1, 2], unit='foo')) self.assertRaises(ValueError, lambda: to_timedelta(1, unit='foo')) # time not supported ATM self.assertRaises(ValueError, lambda: to_timedelta(time(second=1))) self.assertTrue(to_timedelta( time(second=1), errors='coerce') is pd.NaT) self.assertRaises(ValueError, lambda: to_timedelta(['foo', 'bar'])) tm.assert_index_equal(TimedeltaIndex([pd.NaT, pd.NaT]), to_timedelta(['foo', 'bar'], errors='coerce')) tm.assert_index_equal(TimedeltaIndex(['1 day', pd.NaT, '1 min']), to_timedelta(['1 day', 'bar', '1 min'], errors='coerce'))
def test_to_timedelta_on_missing_values(self): # GH5438 timedelta_NaT = np.timedelta64('NaT') actual = pd.to_timedelta(Series(['00:00:01', np.nan])) expected = Series([np.timedelta64(1000000000, 'ns'), timedelta_NaT], dtype='<m8[ns]') assert_series_equal(actual, expected) actual = pd.to_timedelta(Series(['00:00:01', pd.NaT])) assert_series_equal(actual, expected) actual = pd.to_timedelta(np.nan) self.assertEqual(actual.value, timedelta_NaT.astype('int64')) actual = pd.to_timedelta(pd.NaT) self.assertEqual(actual.value, timedelta_NaT.astype('int64'))
def test_apply_to_timedelta(self): timedelta_NaT = pd.to_timedelta('NaT') list_of_valid_strings = ['00:00:01', '00:00:02'] a = pd.to_timedelta(list_of_valid_strings) b = Series(list_of_valid_strings).apply(pd.to_timedelta) # Can't compare until apply on a Series gives the correct dtype # assert_series_equal(a, b) list_of_strings = ['00:00:01', np.nan, pd.NaT, timedelta_NaT] # TODO: unused? a = pd.to_timedelta(list_of_strings) # noqa b = Series(list_of_strings).apply(pd.to_timedelta) # noqa # Can't compare until apply on a Series gives the correct dtype # assert_series_equal(a, b)
def test_get_indexer(self): idx = pd.date_range('2000-01-01', periods=3) tm.assert_numpy_array_equal(idx.get_indexer(idx), [0, 1, 2]) target = idx[0] + pd.to_timedelta(['-1 hour', '12 hours', '1 day 1 hour']) tm.assert_numpy_array_equal(idx.get_indexer(target, 'pad'), [-1, 0, 1]) tm.assert_numpy_array_equal( idx.get_indexer(target, 'backfill'), [0, 1, 2]) tm.assert_numpy_array_equal( idx.get_indexer(target, 'nearest'), [0, 1, 1]) tm.assert_numpy_array_equal( idx.get_indexer(target, 'nearest', tolerance=pd.Timedelta('1 hour')), [0, -1, 1]) with tm.assertRaises(ValueError): idx.get_indexer(idx[[0]], method='nearest', tolerance='foo')
def test_get_loc(self): idx = pd.to_timedelta(['0 days', '1 days', '2 days']) for method in [None, 'pad', 'backfill', 'nearest']: self.assertEqual(idx.get_loc(idx[1], method), 1) self.assertEqual(idx.get_loc(idx[1].to_pytimedelta(), method), 1) self.assertEqual(idx.get_loc(str(idx[1]), method), 1) self.assertEqual( idx.get_loc(idx[1], 'pad', tolerance=pd.Timedelta(0)), 1) self.assertEqual( idx.get_loc(idx[1], 'pad', tolerance=np.timedelta64(0, 's')), 1) self.assertEqual(idx.get_loc(idx[1], 'pad', tolerance=timedelta(0)), 1) with tm.assertRaisesRegexp(ValueError, 'must be convertible'): idx.get_loc(idx[1], method='nearest', tolerance='foo') for method, loc in [('pad', 1), ('backfill', 2), ('nearest', 1)]: self.assertEqual(idx.get_loc('1 day 1 hour', method), loc)
def test_cummax_timedelta64(self): s = pd.Series(pd.to_timedelta(['NaT', '2 min', 'NaT', '1 min', 'NaT', '3 min', ])) expected = pd.Series(pd.to_timedelta(['NaT', '2 min', 'NaT', '2 min', 'NaT', '3 min', ])) result = s.cummax(skipna=True) self.assert_series_equal(expected, result) expected = pd.Series(pd.to_timedelta(['NaT', '2 min', '2 min', '2 min', '2 min', '3 min', ])) result = s.cummax(skipna=False) self.assert_series_equal(expected, result)
def test_constructor_dict_timedelta_index(self): # GH #12169 : Resample category data with timedelta index # construct Series from dict as data and TimedeltaIndex as index # will result NaN in result Series data expected = Series( data=['A', 'B', 'C'], index=pd.to_timedelta([0, 10, 20], unit='s') ) result = Series( data={pd.to_timedelta(0, unit='s'): 'A', pd.to_timedelta(10, unit='s'): 'B', pd.to_timedelta(20, unit='s'): 'C'}, index=pd.to_timedelta([0, 10, 20], unit='s') ) # this should work assert_series_equal(result, expected)
def test_timedelta64_dtype_array_returned(self): # GH 9431 expected = np.array([31200, 45678, 10000], dtype='m8[ns]') td_index = pd.to_timedelta([31200, 45678, 31200, 10000, 45678]) result = algos.unique(td_index) tm.assert_numpy_array_equal(result, expected) self.assertEqual(result.dtype, expected.dtype) s = pd.Series(td_index) result = algos.unique(s) tm.assert_numpy_array_equal(result, expected) self.assertEqual(result.dtype, expected.dtype) arr = s.values result = algos.unique(arr) tm.assert_numpy_array_equal(result, expected) self.assertEqual(result.dtype, expected.dtype)
def make_fea_set(sku_fea, user_fea, train_start_date, train_end_time,action_data): start_days = "2016-02-01" # generate ???? actions = None for i in (1, 2, 3, 5, 7, 10, 15, 21, 30): #for i in (1, 2, 3, 5, 7, 10, 15, 21, 30): start_time = train_end_time - pd.to_timedelta(str(i)+' days') if actions is None: actions = get_action_feat(start_time, train_end_time,action_data) else: actions = pd.merge(actions, get_action_feat(start_time, train_end_time,action_data), how='left', on=['user_id', 'sku_id']) actions = pd.merge(actions, user_fea, how='left', on='user_id') actions = pd.merge(actions, sku_fea, how='left', on='sku_id') actions = actions.fillna(0) print 'fea_weidu3',actions.shape #actions.to_csv('test'+str(train_end_time).split(' ')[0]+'.csv') return actions
def output(self): ''' Generate data wrapper for Mahali temperatures @return Mahali temperature data wrapper ''' # Function to extract date from filename (only month/day/year, no hours/minutes/seconds) def toDateTime(in_filename): return pd.to_datetime(pd.to_datetime(in_filename[7:25]).strftime('%Y-%m-%d')) # Read in file list: mahali_temperature_info = resource_filename('skdaccess', os.path.join('support','mahali_temperature_info.txt')) filenames = pd.read_csv(mahali_temperature_info,header=None, names=('station','filename'), skipinitialspace=True) # Create a columns of dates filenames['date'] = filenames['filename'].apply(toDateTime) # Need to grab day before as data can spill over adjusted_start_date = self.start_date - pd.to_timedelta('1d') adjusted_end_date = self.end_date + pd.to_timedelta('1d') station_list = self.ap_paramList[0]() # Get data for each selected station one day before until one day afte requested date index_to_retrieve = np.logical_and.reduce([filenames.loc[:, 'station'].apply(lambda x: x in station_list), filenames.loc[:, 'date'] >= adjusted_start_date, filenames.loc[:, 'date'] <= self.end_date]) all_temperature_data = self.retrieveOnlineData(filenames[index_to_retrieve]) # Due to data spillover, cut each data frame in dictionary for station in all_temperature_data.keys(): all_temperature_data[station] = all_temperature_data[station].loc[adjusted_start_date:adjusted_end_date] # Return table wrapper of data return TableWrapper(all_temperature_data, default_columns = ['Temperature'])
def output(self): ''' Generate data wrapper for USGS geomagnetic data @return geomagnetic data wrapper ''' observatory_list = self.ap_paramList[0]() # USGS Edge server base_url = 'cwbpub.cr.usgs.gov' factory = EdgeFactory(host=base_url, port=2060) data_dict = OrderedDict() for observatory in observatory_list: ret_data = factory.get_timeseries( observatory=observatory, interval=self.interval, type=self.data_type, channels=self.channels, starttime=UTCDateTime(self.start_time), endtime=UTCDateTime(self.end_time)) obs_data = OrderedDict() for label, trace in zip(self.channels, ret_data): time = pd.to_datetime(trace.stats['starttime'].datetime) + pd.to_timedelta(trace.times(),unit='s') obs_data[label] = pd.Series(trace.data,time) data_dict[observatory] = pd.DataFrame(obs_data) return TableWrapper(data_dict, default_columns=self.channels)
def dateMismatch(dates, days=10): ''' Check if dates are not within a certain number of days of each other @param dates: Iterable container of pandas timestamps @param days: Number of days @return true if they are not with 10 days, false otherwise ''' for combo in combinations(dates,2): if np.abs(combo[0] - combo[1]) > pd.to_timedelta(days, 'D'): return True return False
def convert_gps_time(gpsweek, gpsweekseconds, format='unix'): """ convert_gps_time :: (String -> String) -> Float Converts a GPS time format (weeks + seconds since 6 Jan 1980) to a UNIX timestamp (seconds since 1 Jan 1970) without correcting for UTC leap seconds. Static values gps_delta and gpsweek_cf are defined by the below functions (optimization) gps_delta is the time difference (in seconds) between UNIX time and GPS time. gps_delta = (dt.datetime(1980, 1, 6) - dt.datetime(1970, 1, 1)).total_seconds() gpsweek_cf is the coefficient to convert weeks to seconds gpsweek_cf = 7 * 24 * 60 * 60 # 604800 :param gpsweek: Number of weeks since beginning of GPS time (1980-01-06 00:00:00) :param gpsweekseconds: Number of seconds since the GPS week parameter :return: (float) unix timestamp (number of seconds since 1970-01-01 00:00:00) """ # GPS time begins 1980 Jan 6 00:00, UNIX time begins 1970 Jan 1 00:00 gps_delta = 315964800.0 gpsweek_cf = 604800 if isinstance(gpsweek, pd.Series) and isinstance(gpsweekseconds, pd.Series): gps_ticks = (gpsweek.astype('float64') * gpsweek_cf) + gpsweekseconds.astype('float64') else: gps_ticks = (float(gpsweek) * gpsweek_cf) + float(gpsweekseconds) timestamp = gps_delta + gps_ticks if format == 'unix': return timestamp elif format == 'datetime': return datetime.datetime(1970, 1, 1) + pd.to_timedelta(timestamp, unit='s')
def datenum_to_datetime(timestamp): if isinstance(timestamp, pd.Series): return (timestamp.astype(int).map(datetime.datetime.fromordinal) + pd.to_timedelta(timestamp % 1, unit='D') - pd.to_timedelta('366 days')) else: return (datetime.datetime.fromordinal(int(timestamp) - 366) + datetime.timedelta(days=timestamp % 1))
def _convert_ulysses_time(data): """Method to convert timestamps to datetimes""" data.loc[data['year'] > 50, 'year'] += 1900 data.loc[data['year'] < 50, 'year'] += 2000 data['Time'] = pd.to_datetime(data['year'].astype(str) + ':' + data['doy'].astype(str), format='%Y:%j') data['Time'] += (pd.to_timedelta(data['hour'], unit='h') + pd.to_timedelta(data['minute'], unit='m') + pd.to_timedelta(data['second'], unit='s')) data = data.drop(['year', 'doy', 'hour', 'minute', 'second'], axis=1) return data
def convertColumnsToTimeDeltas(self,section): col=self._df.columns[section] self._df[col]=pd.to_timedelta(self._df[col]) self._dirty = True self.dataChanged.emit() ###############################################################################
def test_resample_with_timedeltas(self): expected = DataFrame({'A': np.arange(1480)}) expected = expected.groupby(expected.index // 30).sum() expected.index = pd.timedelta_range('0 days', freq='30T', periods=50) df = DataFrame({'A': np.arange(1480)}, index=pd.to_timedelta( np.arange(1480), unit='T')) result = df.resample('30T').sum() assert_frame_equal(result, expected) s = df['A'] result = s.resample('30T').sum() assert_series_equal(result, expected['A'])
def test_ops_ndarray(self): td = Timedelta('1 day') # timedelta, timedelta other = pd.to_timedelta(['1 day']).values expected = pd.to_timedelta(['2 days']).values self.assert_numpy_array_equal(td + other, expected) if LooseVersion(np.__version__) >= '1.8': self.assert_numpy_array_equal(other + td, expected) self.assertRaises(TypeError, lambda: td + np.array([1])) self.assertRaises(TypeError, lambda: np.array([1]) + td) expected = pd.to_timedelta(['0 days']).values self.assert_numpy_array_equal(td - other, expected) if LooseVersion(np.__version__) >= '1.8': self.assert_numpy_array_equal(-other + td, expected) self.assertRaises(TypeError, lambda: td - np.array([1])) self.assertRaises(TypeError, lambda: np.array([1]) - td) expected = pd.to_timedelta(['2 days']).values self.assert_numpy_array_equal(td * np.array([2]), expected) self.assert_numpy_array_equal(np.array([2]) * td, expected) self.assertRaises(TypeError, lambda: td * other) self.assertRaises(TypeError, lambda: other * td) self.assert_numpy_array_equal(td / other, np.array([1])) if LooseVersion(np.__version__) >= '1.8': self.assert_numpy_array_equal(other / td, np.array([1])) # timedelta, datetime other = pd.to_datetime(['2000-01-01']).values expected = pd.to_datetime(['2000-01-02']).values self.assert_numpy_array_equal(td + other, expected) if LooseVersion(np.__version__) >= '1.8': self.assert_numpy_array_equal(other + td, expected) expected = pd.to_datetime(['1999-12-31']).values self.assert_numpy_array_equal(-td + other, expected) if LooseVersion(np.__version__) >= '1.8': self.assert_numpy_array_equal(other - td, expected)
def test_ops_series(self): # regression test for GH8813 td = Timedelta('1 day') other = pd.Series([1, 2]) expected = pd.Series(pd.to_timedelta(['1 day', '2 days'])) tm.assert_series_equal(expected, td * other) tm.assert_series_equal(expected, other * td)
def test_timedelta_range(self): expected = to_timedelta(np.arange(5), unit='D') result = timedelta_range('0 days', periods=5, freq='D') tm.assert_index_equal(result, expected) expected = to_timedelta(np.arange(11), unit='D') result = timedelta_range('0 days', '10 days', freq='D') tm.assert_index_equal(result, expected) expected = to_timedelta(np.arange(5), unit='D') + Second(2) + Day() result = timedelta_range('1 days, 00:00:02', '5 days, 00:00:02', freq='D') tm.assert_index_equal(result, expected) expected = to_timedelta([1, 3, 5, 7, 9], unit='D') + Second(2) result = timedelta_range('1 days, 00:00:02', periods=5, freq='2D') tm.assert_index_equal(result, expected) expected = to_timedelta(np.arange(50), unit='T') * 30 result = timedelta_range('0 days', freq='30T', periods=50) tm.assert_index_equal(result, expected) # GH 11776 arr = np.arange(10).reshape(2, 5) df = pd.DataFrame(np.arange(10).reshape(2, 5)) for arg in (arr, df): with tm.assertRaisesRegexp(TypeError, "1-d array"): to_timedelta(arg) for errors in ['ignore', 'raise', 'coerce']: with tm.assertRaisesRegexp(TypeError, "1-d array"): to_timedelta(arg, errors=errors) # issue10583 df = pd.DataFrame(np.random.normal(size=(10, 4))) df.index = pd.timedelta_range(start='0s', periods=10, freq='s') expected = df.loc[pd.Timedelta('0s'):, :] result = df.loc['0s':, :] assert_frame_equal(expected, result)
def test_nat_converters(self): self.assertEqual(to_timedelta( 'nat', box=False).astype('int64'), tslib.iNaT) self.assertEqual(to_timedelta( 'nan', box=False).astype('int64'), tslib.iNaT)
def test_timedelta_ops_scalar(self): # GH 6808 base = pd.to_datetime('20130101 09:01:12.123456') expected_add = pd.to_datetime('20130101 09:01:22.123456') expected_sub = pd.to_datetime('20130101 09:01:02.123456') for offset in [pd.to_timedelta(10, unit='s'), timedelta(seconds=10), np.timedelta64(10, 's'), np.timedelta64(10000000000, 'ns'), pd.offsets.Second(10)]: result = base + offset self.assertEqual(result, expected_add) result = base - offset self.assertEqual(result, expected_sub) base = pd.to_datetime('20130102 09:01:12.123456') expected_add = pd.to_datetime('20130103 09:01:22.123456') expected_sub = pd.to_datetime('20130101 09:01:02.123456') for offset in [pd.to_timedelta('1 day, 00:00:10'), pd.to_timedelta('1 days, 00:00:10'), timedelta(days=1, seconds=10), np.timedelta64(1, 'D') + np.timedelta64(10, 's'), pd.offsets.Day() + pd.offsets.Second(10)]: result = base + offset self.assertEqual(result, expected_add) result = base - offset self.assertEqual(result, expected_sub)
def test_constructor_coverage(self): rng = timedelta_range('1 days', periods=10.5) exp = timedelta_range('1 days', periods=10) self.assertTrue(rng.equals(exp)) self.assertRaises(ValueError, TimedeltaIndex, start='1 days', periods='foo', freq='D') self.assertRaises(ValueError, TimedeltaIndex, start='1 days', end='10 days') self.assertRaises(ValueError, TimedeltaIndex, '1 days') # generator expression gen = (timedelta(i) for i in range(10)) result = TimedeltaIndex(gen) expected = TimedeltaIndex([timedelta(i) for i in range(10)]) self.assertTrue(result.equals(expected)) # NumPy string array strings = np.array(['1 days', '2 days', '3 days']) result = TimedeltaIndex(strings) expected = to_timedelta([1, 2, 3], unit='d') self.assertTrue(result.equals(expected)) from_ints = TimedeltaIndex(expected.asi8) self.assertTrue(from_ints.equals(expected)) # non-conforming freq self.assertRaises(ValueError, TimedeltaIndex, ['1 days', '2 days', '4 days'], freq='D') self.assertRaises(ValueError, TimedeltaIndex, periods=10, freq='D')
def test_conversion_preserves_name(self): # GH 10875 i = pd.Index(['01:02:03', '01:02:04'], name='label') self.assertEqual(i.name, pd.to_datetime(i).name) self.assertEqual(i.name, pd.to_timedelta(i).name)
def create_index(self): return pd.to_timedelta(range(5), unit='d') + pd.offsets.Hour(1)
def test_get_indexer(self): idx = pd.to_timedelta(['0 days', '1 days', '2 days']) tm.assert_numpy_array_equal(idx.get_indexer(idx), [0, 1, 2]) target = pd.to_timedelta(['-1 hour', '12 hours', '1 day 1 hour']) tm.assert_numpy_array_equal(idx.get_indexer(target, 'pad'), [-1, 0, 1]) tm.assert_numpy_array_equal( idx.get_indexer(target, 'backfill'), [0, 1, 2]) tm.assert_numpy_array_equal( idx.get_indexer(target, 'nearest'), [0, 1, 1]) tm.assert_numpy_array_equal( idx.get_indexer(target, 'nearest', tolerance=pd.Timedelta('1 hour')), [0, -1, 1])
def test_quantile(self): from numpy import percentile q = self.ts.quantile(0.1) self.assertEqual(q, percentile(self.ts.valid(), 10)) q = self.ts.quantile(0.9) self.assertEqual(q, percentile(self.ts.valid(), 90)) # object dtype q = Series(self.ts, dtype=object).quantile(0.9) self.assertEqual(q, percentile(self.ts.valid(), 90)) # datetime64[ns] dtype dts = self.ts.index.to_series() q = dts.quantile(.2) self.assertEqual(q, Timestamp('2000-01-10 19:12:00')) # timedelta64[ns] dtype tds = dts.diff() q = tds.quantile(.25) self.assertEqual(q, pd.to_timedelta('24:00:00')) # GH7661 result = Series([np.timedelta64('NaT')]).sum() self.assertTrue(result is pd.NaT) msg = 'percentiles should all be in the interval \\[0, 1\\]' for invalid in [-1, 2, [0.5, -1], [0.5, 2]]: with tm.assertRaisesRegexp(ValueError, msg): self.ts.quantile(invalid)
def test_isin_with_i8(self): # GH 5021 expected = Series([True, True, False, False, False]) expected2 = Series([False, True, False, False, False]) # datetime64[ns] s = Series(date_range('jan-01-2013', 'jan-05-2013')) result = s.isin(s[0:2]) assert_series_equal(result, expected) result = s.isin(s[0:2].values) assert_series_equal(result, expected) # fails on dtype conversion in the first place result = s.isin(s[0:2].values.astype('datetime64[D]')) assert_series_equal(result, expected) result = s.isin([s[1]]) assert_series_equal(result, expected2) result = s.isin([np.datetime64(s[1])]) assert_series_equal(result, expected2) # timedelta64[ns] s = Series(pd.to_timedelta(lrange(5), unit='d')) result = s.isin(s[0:2]) assert_series_equal(result, expected)
def test_timedelta64_operations_with_timedeltas(self): # td operate with td td1 = Series([timedelta(minutes=5, seconds=3)] * 3) td2 = timedelta(minutes=5, seconds=4) result = td1 - td2 expected = Series([timedelta(seconds=0)] * 3) - Series([timedelta( seconds=1)] * 3) self.assertEqual(result.dtype, 'm8[ns]') assert_series_equal(result, expected) result2 = td2 - td1 expected = (Series([timedelta(seconds=1)] * 3) - Series([timedelta( seconds=0)] * 3)) assert_series_equal(result2, expected) # roundtrip assert_series_equal(result + td2, td1) # Now again, using pd.to_timedelta, which should build # a Series or a scalar, depending on input. td1 = Series(pd.to_timedelta(['00:05:03'] * 3)) td2 = pd.to_timedelta('00:05:04') result = td1 - td2 expected = Series([timedelta(seconds=0)] * 3) - Series([timedelta( seconds=1)] * 3) self.assertEqual(result.dtype, 'm8[ns]') assert_series_equal(result, expected) result2 = td2 - td1 expected = (Series([timedelta(seconds=1)] * 3) - Series([timedelta( seconds=0)] * 3)) assert_series_equal(result2, expected) # roundtrip assert_series_equal(result + td2, td1)
def test_even_day(self): delta_1d = pd.to_timedelta(1, unit='D') delta_0d = pd.to_timedelta(0, unit='D') delta_1s = pd.to_timedelta(1, unit='s') delta_500ms = pd.to_timedelta(500, unit='ms') drepr = lambda x: x._repr_base(format='even_day') self.assertEqual(drepr(delta_1d), "1 days") self.assertEqual(drepr(-delta_1d), "-1 days") self.assertEqual(drepr(delta_0d), "0 days") self.assertEqual(drepr(delta_1s), "0 days 00:00:01") self.assertEqual(drepr(delta_500ms), "0 days 00:00:00.500000") self.assertEqual(drepr(delta_1d + delta_1s), "1 days 00:00:01") self.assertEqual( drepr(delta_1d + delta_500ms), "1 days 00:00:00.500000")
def test_sub_day(self): delta_1d = pd.to_timedelta(1, unit='D') delta_0d = pd.to_timedelta(0, unit='D') delta_1s = pd.to_timedelta(1, unit='s') delta_500ms = pd.to_timedelta(500, unit='ms') drepr = lambda x: x._repr_base(format='sub_day') self.assertEqual(drepr(delta_1d), "1 days") self.assertEqual(drepr(-delta_1d), "-1 days") self.assertEqual(drepr(delta_0d), "00:00:00") self.assertEqual(drepr(delta_1s), "00:00:01") self.assertEqual(drepr(delta_500ms), "00:00:00.500000") self.assertEqual(drepr(delta_1d + delta_1s), "1 days 00:00:01") self.assertEqual( drepr(delta_1d + delta_500ms), "1 days 00:00:00.500000")
def test_long(self): delta_1d = pd.to_timedelta(1, unit='D') delta_0d = pd.to_timedelta(0, unit='D') delta_1s = pd.to_timedelta(1, unit='s') delta_500ms = pd.to_timedelta(500, unit='ms') drepr = lambda x: x._repr_base(format='long') self.assertEqual(drepr(delta_1d), "1 days 00:00:00") self.assertEqual(drepr(-delta_1d), "-1 days +00:00:00") self.assertEqual(drepr(delta_0d), "0 days 00:00:00") self.assertEqual(drepr(delta_1s), "0 days 00:00:01") self.assertEqual(drepr(delta_500ms), "0 days 00:00:00.500000") self.assertEqual(drepr(delta_1d + delta_1s), "1 days 00:00:01") self.assertEqual( drepr(delta_1d + delta_500ms), "1 days 00:00:00.500000")
def test_all(self): delta_1d = pd.to_timedelta(1, unit='D') delta_0d = pd.to_timedelta(0, unit='D') delta_1ns = pd.to_timedelta(1, unit='ns') drepr = lambda x: x._repr_base(format='all') self.assertEqual(drepr(delta_1d), "1 days 00:00:00.000000000") self.assertEqual(drepr(delta_0d), "0 days 00:00:00.000000000") self.assertEqual(drepr(delta_1ns), "0 days 00:00:00.000000001")
def test_days(self): x = pd.to_timedelta(list(range(5)) + [pd.NaT], unit='D') result = fmt.Timedelta64Formatter(x, box=True).get_result() self.assertEqual(result[0].strip(), "'0 days'") self.assertEqual(result[1].strip(), "'1 days'") result = fmt.Timedelta64Formatter(x[1:2], box=True).get_result() self.assertEqual(result[0].strip(), "'1 days'") result = fmt.Timedelta64Formatter(x, box=False).get_result() self.assertEqual(result[0].strip(), "0 days") self.assertEqual(result[1].strip(), "1 days") result = fmt.Timedelta64Formatter(x[1:2], box=False).get_result() self.assertEqual(result[0].strip(), "1 days")
def test_subdays(self): y = pd.to_timedelta(list(range(5)) + [pd.NaT], unit='s') result = fmt.Timedelta64Formatter(y, box=True).get_result() self.assertEqual(result[0].strip(), "'00:00:00'") self.assertEqual(result[1].strip(), "'00:00:01'")
def test_subdays_neg(self): y = pd.to_timedelta(list(range(5)) + [pd.NaT], unit='s') result = fmt.Timedelta64Formatter(-y, box=True).get_result() self.assertEqual(result[0].strip(), "'00:00:00'") self.assertEqual(result[1].strip(), "'-1 days +23:59:59'")
def test_zero(self): x = pd.to_timedelta(list(range(1)) + [pd.NaT], unit='D') result = fmt.Timedelta64Formatter(x, box=True).get_result() self.assertEqual(result[0].strip(), "'0 days'") x = pd.to_timedelta(list(range(1)), unit='D') result = fmt.Timedelta64Formatter(x, box=True).get_result() self.assertEqual(result[0].strip(), "'0 days'")
def test_describe_timedelta(self): df = DataFrame({"td": pd.to_timedelta(np.arange(24) % 20, "D")}) self.assertTrue(df.describe().loc["mean"][0] == pd.to_timedelta( "8d4h"))
def _chunk_to_dataframe(self): n = self._current_row_in_chunk_index m = self._current_row_in_file_index ix = range(m - n, m) rslt = pd.DataFrame(index=ix) js, jb = 0, 0 for j in range(self.column_count): name = self.column_names[j] if self.column_types[j] == b'd': rslt[name] = self._byte_chunk[jb, :].view( dtype=self.byte_order + 'd') rslt[name] = np.asarray(rslt[name], dtype=np.float64) if self.convert_dates and (self.column_formats[j] == "MMDDYY"): epoch = pd.datetime(1960, 1, 1) rslt[name] = epoch + pd.to_timedelta(rslt[name], unit='d') jb += 1 elif self.column_types[j] == b's': rslt[name] = self._string_chunk[js, :] rslt[name] = rslt[name].apply(lambda x: x.rstrip(b'\x00 ')) if self.encoding is not None: rslt[name] = rslt[name].apply( lambda x: x.decode(encoding=self.encoding)) if self.blank_missing: ii = rslt[name].str.len() == 0 rslt.loc[ii, name] = np.nan js += 1 else: raise ValueError("unknown column type %s" % self.column_types[j]) return rslt
def test_timedelta(self): # see #6921 df = to_timedelta( Series(['00:00:01', '00:00:03'], name='foo')).to_frame() with tm.assert_produces_warning(UserWarning): df.to_sql('test_timedelta', self.conn) result = sql.read_sql_query('SELECT * FROM test_timedelta', self.conn) tm.assert_series_equal(result['foo'], df['foo'].astype('int64'))
def test_timedelta(self): converter = lambda x: pd.to_timedelta(x, unit='ms') s = Series([timedelta(23), timedelta(seconds=5)]) self.assertEqual(s.dtype, 'timedelta64[ns]') # index will be float dtype assert_series_equal(s, pd.read_json(s.to_json(), typ='series') .apply(converter), check_index_type=False) s = Series([timedelta(23), timedelta(seconds=5)], index=pd.Index([0, 1], dtype=float)) self.assertEqual(s.dtype, 'timedelta64[ns]') assert_series_equal(s, pd.read_json( s.to_json(), typ='series').apply(converter)) frame = DataFrame([timedelta(23), timedelta(seconds=5)]) self.assertEqual(frame[0].dtype, 'timedelta64[ns]') assert_frame_equal(frame, pd.read_json(frame.to_json()) .apply(converter), check_index_type=False, check_column_type=False) frame = DataFrame({'a': [timedelta(days=23), timedelta(seconds=5)], 'b': [1, 2], 'c': pd.date_range(start='20130101', periods=2)}) result = pd.read_json(frame.to_json(date_unit='ns')) result['a'] = pd.to_timedelta(result.a, unit='ns') result['c'] = pd.to_datetime(result.c) assert_frame_equal(frame, result, check_index_type=False)
def test_concat_timedelta64_block(self): from pandas import to_timedelta rng = to_timedelta(np.arange(10), unit='s') df = DataFrame({'time': rng}) result = concat([df, df]) self.assertTrue((result.iloc[:10]['time'] == rng).all()) self.assertTrue((result.iloc[10:]['time'] == rng).all())
def init(): """Return top level command handler.""" @click.command() @cli.handle_exceptions(restclient.CLI_REST_EXCEPTIONS) @click.option('--match', help='Server name pattern match') @click.option('--full', is_flag=True, default=False) @click.pass_context def apps(ctx, match, full): """View apps report.""" report = fetch_report(ctx.obj.get('api'), 'apps', match) # Replace integer N/As for col in ['identity', 'expires', 'lease', 'data_retention']: report.loc[report[col] == -1, col] = '' # Convert to datetimes for col in ['expires']: report[col] = pd.to_datetime(report[col], unit='s') # Convert to timedeltas for col in ['lease', 'data_retention']: report[col] = pd.to_timedelta(report[col], unit='s') report = report.fillna('') if not full: report = report[[ 'instance', 'allocation', 'partition', 'server', 'mem', 'cpu', 'disk' ]] print_report(report) return apps
def format_date_to_datetime(self, df, t_date = None): if t_date is None: t_date = dataTime.datetimeRelative(delta = 0) t_date = t_date.replace(' 00:00:00', '') df_new = df.copy() df_new.insert(0, 'datetime', t_date) df_new['datetime'] = pd.to_datetime(df_new['datetime']) df_new['time'] = pd.to_timedelta(df_new['time']) df_new['datetime'] = df_new['datetime'] + df_new['time'] df_new = df_new.sort_values(['datetime'], ascending=[True]) del df_new['time'] return df_new # ???? # ????? # code???????6?????????????sh=???? sz=???? hs300=??300?? sz50=??50 zxb=??? cyb=???? # start????????YYYY-MM-DD # end????????YYYY-MM-DD # ktype??????D=?k? W=? M=? 5=5?? 15=15?? 30=30?? 60=60??????D # retry_count???????????????3 # pause:???????????0 # ?????? # date??? # open???? # high???? # close???? # low???? # volume???? # price_change????? # p_change???? # ma5?5??? # ma10?10??? # ma20:20??? # v_ma5:5??? # v_ma10:10??? # v_ma20:20??? # turnover:???[???????]
def process(self, obj_data): ''' Apply the MIDAS estimator to generate velocity estimates Adds the result to the data wrapper @param obj_data: Data wrapper ''' if self.column_names == None: column_names = obj_data.getDefaultColumns() else: column_names = self.column_names time_diff = pd.to_timedelta('365d') results = dict() for label, data in obj_data.getIterator(): start_date = data.index[0] end_date = data.index[-1] for column in column_names: start_data = data.loc[start_date:(end_date-time_diff), column] end_data = data.loc[start_date+time_diff:end_date, column] offsets = end_data.values - start_data.values offsets = offsets[~np.isnan(offsets)] med_off = np.median(offsets) mad_off = mad(offsets) cut_offsets = offsets[np.logical_and(offsets < med_off + 2*mad_off, offsets > med_off - 2*mad_off)] final_vel = np.median(cut_offsets) final_unc = np.sqrt(np.pi/2) * mad(cut_offsets) / np.sqrt(len(cut_offsets)) results[label] = pd.DataFrame([final_vel,final_unc], ['velocity', 'uncertainty'] ,[column]) obj_data.addResult(self.str_description, pd.Panel.fromDict(results,orient='minor'))
def to_seconds(timedelta_str): return to_timedelta(timedelta_str).total_seconds()
def to_timedelta(timedelta_repr): return pd.to_timedelta(str(timedelta_repr), unit='s')
def test_timedelta_to_human(self): for td in timedelta(days=1, seconds=3900), pd.to_timedelta('1d1h5m'): self.assertEqual('1.05 days', timedelta_to_human(td, precision=2)) self.assertEqual('1.0 day', timedelta_to_human(td, precision=1)) for td in timedelta(days=-1, seconds=-3900), pd.to_timedelta('-1d1h5m'): self.assertEqual('1.05 days ago', timedelta_to_human(td, precision=2)) self.assertEqual('1.0 day ago', timedelta_to_human(td, precision=1))
def get_accumulate_action_feat(start_time, end_time,action_data): actions=action_data[(action_data['time']>=start_time)&(action_data['time']<=end_time)] action_data['time'] = pd.to_datetime(action_data['time'],format='%Y-%m-%d %H:%M:%S') df = pd.get_dummies(actions['type'], prefix='action') actions = pd.concat([actions, df], axis=1) # type: pd.DataFrame #????????? actions['weights'] = actions['time'].map(lambda x: pd.to_timedelta(end_time-x)) #actions['weights'] = time.strptime(end_date, '%Y-%m-%d') - actions['datetime'] actions['weights'] = actions['weights'].map(lambda x: math.exp(-x.days)) print actions.head(10) actions['action_1'] = actions['action_1'] * actions['weights'] actions['action_2'] = actions['action_2'] * actions['weights'] actions['action_3'] = actions['action_3'] * actions['weights'] actions['action_4'] = actions['action_4'] * actions['weights'] actions['action_5'] = actions['action_5'] * actions['weights'] actions['action_6'] = actions['action_6'] * actions['weights'] del actions['model_id'] del actions['time'] del actions['weights'] del actions['cate'] del actions['brand'] actions = actions.groupby(['user_id', 'sku_id'], as_index=False).sum() actions.fillna(0,inplace=True) actions['action_1256']=actions['action_1']+actions['action_2']+actions['action_5']+actions['action_6'] actions['action_1256_d_4']=actions['action_4']/actions['action_1256'] del actions['type'] return actions