我们从Python开源项目中,提取了以下50个代码示例,用于说明如何使用pandas.to_datetime()。
def y_sum_by_time(x_arr, y_arr, top=None): df = pd.DataFrame({'Timestamp': pd.to_datetime(x_arr, unit='s'), 'Status': y_arr}) df['Date'] = df['Timestamp'].apply(lambda x: "%d/%d/%d" % (x.day, x.month, x.year)) df['Hour'] = df['Timestamp'].apply(lambda x: "%d" % (x.hour)) df['Weekday'] = df['Timestamp'].apply(lambda x: "%s" % (x.weekday_name)) times = ['Hour', 'Weekday', 'Date'] result = {} for groupby in times: df_group = df.groupby(groupby, as_index=False).agg({'Status': np.sum}) if top != None and top > 0: #df_group = df_group.nlargest(top, 'Status').sort(['Status', 'Hour'],ascending=False) idx = df_group.nlargest(top, 'Status') > 0 else: idx = df_group['Status'].max() == df_group['Status'] result[groupby] = {k: g['Status'].replace(np.nan, 'None').tolist() for k,g in df_group[idx].groupby(groupby)} return result
def output(self): ''' Generate data wrapper for Mahali data @return Mahali data wrapper ''' nav_files, obs_files = self.cacheData() def getSiteAndDate(in_filename): date = pd.to_datetime('2015' + in_filename[-8:-5], format='%Y%j') return in_filename[-12:-8], date data_list = [] for nav, obs in zip(nav_files, obs_files): site, date = getSiteAndDate(nav) if (site,date) != getSiteAndDate(obs): raise RuntimeError('Data mismatch') # data_list.append([site,date,readRinexNav(nav), rinexobs(obs)]) data_list.append([site,date,nav, obs]) return DataWrapper(data_list)
def __init__(self, ap_paramList = [], start_date = None, end_date = None, cutoff=0.75): ''' Construct a Groundwater Data Fetcher @param ap_paramList[LowerLat]: Autoparam Lower latitude @param ap_paramList[UpperLat]: Autoparam Upper latitude @param ap_paramList[LeftLon]: Autoparam Left longitude @param ap_paramList[RightLon]: Autoparam Right longitude @param start_date: Starting date (defualt: None) @param end_date: Ending date (default: None) @param cutoff: Required amount of data for each station ''' self.start_date = pd.to_datetime(start_date) self.end_date = pd.to_datetime(end_date) self.ap_paramList = ap_paramList self.cutoff = cutoff
def convert_date(in_date): ''' Converts input string to pandas date time, ignores other types of objects @param in_date: Input date return pandas data time object ''' if isinstance(in_date,str): try: return pd.to_datetime(in_date) except ValueError as e: return pd.to_datetime(in_date, format='%Y%j') else: return in_date
def QA_fetch_get_security_bars(code, _type, lens, ip=best_ip['stock'], port=7709): api = TdxHq_API() with api.connect(ip, port): data = pd.concat([api.to_df(api.get_security_bars(_select_type(_type), _select_market_code( code), code, (i - 1) * 800, 800)) for i in range(1, int(lens / 800) + 2)], axis=0) data = data\ .assign(datetime=pd.to_datetime(data['datetime']), code=str(code))\ .drop(['year', 'month', 'day', 'hour', 'minute'], axis=1, inplace=False)\ .assign(date=data['datetime'].apply(lambda x: str(x)[0:10]))\ .assign(date_stamp=data['datetime'].apply(lambda x: QA_util_date_stamp(x)))\ .assign(time_stamp=data['datetime'].apply(lambda x: QA_util_time_stamp(x)))\ .assign(type=_type).set_index('datetime', drop=False, inplace=False).tail(lens) if data is not None: return data else: return None
def QA_fetch_get_stock_xdxr(code, ip=best_ip['stock'], port=7709): '????' api = TdxHq_API() market_code = _select_market_code(code) with api.connect(ip, port): category = { '1': '????', '2': '?????', '3': '??????', '4': '??????', '5': '????', '6': '????', '7': '????', '8': '??????', '9': '?????', '10': '?????', '11': '???', '12': '??????', '13': '?????', '14': '?????'} data = api.to_df(api.get_xdxr_info(market_code, code)) if len(data) >= 1: data = data\ .assign(date=pd.to_datetime(data[['year', 'month', 'day']]))\ .drop(['year', 'month', 'day'], axis=1)\ .assign(category_meaning=data['category'].apply(lambda x: category[str(x)]))\ .assign(code=str(code))\ .rename(index=str, columns={'panhouliutong': 'liquidity_after', 'panqianliutong': 'liquidity_before', 'houzongguben': 'shares_after', 'qianzongguben': 'shares_before'})\ .set_index('date', drop=False, inplace=False) return data.assign(date=data['date'].apply(lambda x: str(x)[0:10])) else: return None
def QA_data_stock_to_fq(__data, type_='01'): def __QA_fetch_stock_xdxr(code, format_='pd', collections=QA_Setting.client.quantaxis.stock_xdxr): '????????/???' try: data = pd.DataFrame([item for item in collections.find( {'code': code})]).drop(['_id'], axis=1) data['date'] = pd.to_datetime(data['date']) return data.set_index(['date', 'code'], drop=False) except: return pd.DataFrame(columns=['category', 'category_meaning', 'code', 'date', 'fenhong', 'fenshu', 'liquidity_after', 'liquidity_before', 'name', 'peigu', 'peigujia', 'shares_after', 'shares_before', 'songzhuangu', 'suogu', 'xingquanjia']) '?? ??/??? ??????' if type_ in ['01', 'qfq']: #print(QA_data_make_qfq(__data, __QA_fetch_stock_xdxr(__data['code'][0]))) return QA_data_make_qfq(__data, __QA_fetch_stock_xdxr(__data['code'][0])) elif type_ in ['02', 'hfq']: return QA_data_make_hfq(__data, __QA_fetch_stock_xdxr(__data['code'][0])) else: QA_util_log_info('wrong fq type! Using qfq') return QA_data_make_qfq(__data, __QA_fetch_stock_xdxr(__data['code'][0]))
def mfi(df): df['date'] = pd.to_datetime(df.date) fig = plt.figure(figsize=(16, 9)) gs = GridSpec(3, 1) # 2 rows, 3 columns fig.suptitle(df['date'][-1:].values[0]) fig.set_label('MFI') price = fig.add_subplot(gs[:2, 0]) price.plot(df['date'], df['close'], color='blue') indicator = fig.add_subplot(gs[2, 0], sharex=price) indicator.plot(df['date'], df['mfi'], c='pink') indicator.plot(df['date'], [20.]*len(df['date']), c='green') indicator.plot(df['date'], [80.]*len(df['date']), c='orange') price.grid(True) indicator.grid(True) plt.tight_layout() plt.show()
def atr(df): ''' Average True Range :param df: :return: ''' df['date'] = pd.to_datetime(df.date) fig = plt.figure(figsize=(16, 9)) gs = GridSpec(3, 1) # 2 rows, 3 columns fig.suptitle(df['date'][-1:].values[0]) fig.set_label('ATR') price = fig.add_subplot(gs[:2, 0]) price.plot(df['date'], df['close'], color='blue') indicator = fig.add_subplot(gs[2, 0], sharex=price) indicator.plot(df['date'], df['atr'], c='pink') # indicator.plot(df['date'], [20.]*len(df['date']), c='green') # indicator.plot(df['date'], [80.]*len(df['date']), c='orange') price.grid(True) indicator.grid(True) plt.tight_layout() plt.show()
def rocr(df): ''' Average True Range :param df: :return: ''' df['date'] = pd.to_datetime(df.date) fig = plt.figure(figsize=(16, 9)) gs = GridSpec(3, 1) # 2 rows, 3 columns fig.suptitle(df['date'][-1:].values[0]) fig.set_label('ATR') price = fig.add_subplot(gs[:2, 0]) price.plot(df['date'], df['close'], color='blue') indicator = fig.add_subplot(gs[2, 0], sharex=price) indicator.plot(df['date'], df['rocr'], c='pink') # indicator.plot(df['date'], [20.]*len(df['date']), c='green') # indicator.plot(df['date'], [80.]*len(df['date']), c='orange') price.grid(True) indicator.grid(True) plt.tight_layout() plt.show()
def get_indicator(df, indicator): ret_df = df if 'MACD' in indicator: macd, macdsignal, macdhist = ta.MACD(df.close.values, fastperiod=12, slowperiod=26, signalperiod=9) ret_df = KlineData._merge_dataframe(pd.DataFrame([macd, macdsignal, macdhist]).T.rename(columns={0: "macddif", 1: "macddem", 2: "macdhist"}), ret_df) ret_df = KlineData._merge_dataframe(line_intersections(ret_df, columns=['macddif', 'macddem']), ret_df) if 'MFI' in indicator: real = ta.MFI(df.high.values, df.low.values, df.close.values, df.volume.values, timeperiod=14) ret_df = KlineData._merge_dataframe(pd.DataFrame([real]).T.rename(columns={0: "mfi"}), ret_df) if 'ATR' in indicator: real = ta.NATR(df.high.values, df.low.values, df.close.values, timeperiod=14) ret_df = KlineData._merge_dataframe(pd.DataFrame([real]).T.rename(columns={0: "atr"}), ret_df) if 'ROCR' in indicator: real = ta.ROCR(df.close.values, timeperiod=10) ret_df = KlineData._merge_dataframe(pd.DataFrame([real]).T.rename(columns={0: "rocr"}), ret_df) ret_df['date'] = pd.to_datetime(ret_df['date'], format='%Y-%m-%d') return ret_df
def to_ns(x): """Convert input timestamps to nanoseconds (integers) :param x: value to be converted :returns: converted value :rtype: int """ if pd.isnull(x): return 0 try: return pd.to_datetime(x).value except: if hasattr(x, '__str__'): return pd.to_datetime(str(x)).value return 0
def get_twitter_sentiment_multilabel_classification_dataset(): file_name = os.path.join('tests', 'twitter_sentiment.csv') try: df_twitter = pd.read_csv(open(file_name,'rU'), encoding='utf-8', engine='python') except Exception as e: print('Error') print(e) dataset_url = 'https://raw.githubusercontent.com/ClimbsRocks/sample_datasets/master/twitter_airline_sentiment.csv' df_twitter = pd.read_csv(dataset_url) # Do not write the index that pandas automatically creates df_twitter.to_csv(file_name, index=False) # Grab only 10% of the dataset- runs much faster this way df_twitter = df_twitter.sample(frac=0.1) df_twitter['tweet_created'] = pd.to_datetime(df_twitter.tweet_created) df_twitter_train, df_twitter_test = train_test_split(df_twitter, test_size=0.33, random_state=42) return df_twitter_train, df_twitter_test
def add_date_features_df(df, date_col): # Pandas nicely tries to prevent you from doing stupid things, like setting values on a copy of a df, not your real one # However, it's a bit overzealous in this case, so we'll side-step a bunch of warnings by setting is_copy to false here df.is_copy = False df[date_col] = pd.to_datetime(df[date_col]) df[date_col + '_day_of_week'] = df[date_col].apply(lambda x: x.weekday()).astype(int, raise_on_error=False) try: df[date_col + '_hour'] = df[date_col].apply(lambda x: x.hour).astype(int, raise_on_error=False) df[date_col + '_minutes_into_day'] = df[date_col].apply(lambda x: x.hour * 60 + x.minute) except AttributeError: pass df[date_col + '_is_weekend'] = df[date_col].apply(lambda x: x.weekday() in (5,6)) df[date_col + '_day_part'] = df[date_col + '_minutes_into_day'].apply(minutes_into_day_parts) df = df.drop([date_col], axis=1) return df # Same logic as above, except implemented for a single dictionary, which is much faster at prediction time when getting just a single prediction
def get_utc_timestamp(dt): """ Returns the Timestamp/DatetimeIndex with either localized or converted to UTC. Parameters ---------- dt : Timestamp/DatetimeIndex the date(s) to be converted Returns ------- same type as input date(s) converted to UTC """ dt = pd.to_datetime(dt) try: dt = dt.tz_localize('UTC') except TypeError: dt = dt.tz_convert('UTC') return dt
def format_dates(self, data, columns): """ This method translates columns values into datetime objects :param data: original Pandas dataframe :param columns: list of columns to cast the date to a datetime object :type data: pandas.DataFrame :type columns: list of strings :returns: Pandas dataframe with updated 'columns' with datetime objects :rtype: pandas.DataFrame """ for column in columns: if column in data.columns: data[column] = pandas.to_datetime(data[column]) return data
def __init__(self, begin=None, end=None): """ self.calendar ???? type weekday next_td tradeday day_trade night_trade midnight_trade date 2016-01-01 2 5 2016-01-04 2016-01-01 True True True 2016-01-02 3 6 2016-01-04 2016-01-04 False False True :param begin: :param end: """ # ?????, pd.Sereis, date: type self.holidays = self.get_holiday_json() self.begin = begin or self.yearbegin() self.end = end or self.yearend() # ??1?10? if self.holidays.shape[0]: end = max(self.holidays.index) end = pd.to_datetime(end) self.end = self.end.replace(end.year + 1) # ???? self.calendar = self.getCalendar()
def ml_regression_build_prediction_test_window(self, req, num_units, rds, dbs): import pandas as pd ml_type = req["MLType"] target_column_name = req["TargetColumnName"] # What column is getting processed? target_column_values = req["TargetColumnValues"] # Possible values each int in the target_column_name maps to train_feature_names = req["TrainFeatures"] # Pass in the features to train source_df = req["SourceDF"] sample_filter_mask = (source_df["DSName"] != "") new_df = source_df.iloc[-1 * int(num_units):] if "Date" in str(source_df.columns): new_df["Date"] = pd.to_datetime(new_df["Date"], format='%Y-%m-%d') # assuming the Date column is present if "FDate" in str(source_df.columns): new_df["FDate"] = pd.to_datetime(new_df["FDate"], format='%Y-%m-%d') # assuming the Future Date column is present last_row = new_df.iloc[-1] return new_df # end of ml_regression_build_prediction_test_window
def _to_dataframe(self, rs): result = {} if isinstance(rs, list): return map(self._to_dataframe, rs) for key, data in rs.items(): name, tags = key if tags is None: key = name else: key = (name, tuple(sorted(tags.items()))) df = pd.DataFrame(data) df.time = pd.to_datetime(df.time) df.set_index('time', inplace=True) df.index = df.index.tz_localize('UTC') df.index.name = None result[key] = df return result
def test_query_into_dataframe(self): data = [ { "name": "foo", "columns": ["time", "sequence_number", "column_one"], "points": [ [3600, 16, 2], [3600, 15, 1], [0, 14, 2], [0, 13, 1] ] } ] # dataframe sorted ascending by time first, then sequence_number dataframe = pd.DataFrame(data=[[13, 1], [14, 2], [15, 1], [16, 2]], index=pd.to_datetime([0, 0, 3600, 3600], unit='s', utc=True), columns=['sequence_number', 'column_one']) with _mocked_session('get', 200, data): cli = DataFrameClient('host', 8086, 'username', 'password', 'db') result = cli.query('select column_one from foo;') assert_frame_equal(dataframe, result)
def _to_dataframe(self, json_result, time_precision): dataframe = pd.DataFrame(data=json_result['points'], columns=json_result['columns']) if 'sequence_number' in dataframe.keys(): dataframe.sort_values(['time', 'sequence_number'], inplace=True) else: dataframe.sort_values(['time'], inplace=True) pandas_time_unit = time_precision if time_precision == 'm': pandas_time_unit = 'ms' elif time_precision == 'u': pandas_time_unit = 'us' dataframe.index = pd.to_datetime(list(dataframe['time']), unit=pandas_time_unit, utc=True) del dataframe['time'] return dataframe
def _convert_dataframe_to_json(self, dataframe, name, time_precision='s'): if not isinstance(dataframe, pd.DataFrame): raise TypeError('Must be DataFrame, but type was: {0}.' .format(type(dataframe))) if not (isinstance(dataframe.index, pd.tseries.period.PeriodIndex) or isinstance(dataframe.index, pd.tseries.index.DatetimeIndex)): raise TypeError('Must be DataFrame with DatetimeIndex or \ PeriodIndex.') if isinstance(dataframe.index, pd.tseries.period.PeriodIndex): dataframe.index = dataframe.index.to_timestamp() else: dataframe.index = pd.to_datetime(dataframe.index) if dataframe.index.tzinfo is None: dataframe.index = dataframe.index.tz_localize('UTC') dataframe['time'] = [self._datetime_to_epoch(dt, time_precision) for dt in dataframe.index] data = {'name': name, 'columns': [str(column) for column in dataframe.columns], 'points': [self._convert_array(x) for x in dataframe.values]} return data
def parse_header(self, l_data): ''' Format the list of the header passe and return a dictionary :param l_data: list. list with the elements of the parsed row ''' d_rtn = {} # Name of file d_rtn['name_of_file'] = l_data[1] # Initial date of file d_rtn['initial_date'] = pd.to_datetime(l_data[2], format='%Y-%m-%d') d_rtn['initial_date'] = d_rtn['initial_date'].date() # End date of file d_rtn['end_date'] = pd.to_datetime(l_data[3], format='%Y-%m-%d') d_rtn['end_date'] = d_rtn['end_date'].date() # Contain the total of lines when the file Trailer record d_rtn['total_of_lines'] = int(l_data[4]) return d_rtn
def setUp(self): self.data = {'id': [chr(97 + c) for c in range(1, 10)], 'x': [50, 50, -10, 0, 0, 5, 15, -3, None], 'y': [0.000001, 654.152, None, 15.984512, 3122, -3.1415926535, 111, 15.9, 13.5], 'cat': ['a', 'long text value', u'Élysée', '', None, 'some <b> B.s </div> </div> HTML stuff', 'c', 'c', 'c'], 's1': np.ones(9), 's2': [u'some constant text $ % value {obj} ' for _ in range(1, 10)], 'somedate': [datetime.date(2011, 7, 4), datetime.datetime(2022, 1, 1, 13, 57), datetime.datetime(1990, 12, 9), np.nan, datetime.datetime( 1990, 12, 9), datetime.datetime(1950, 12, 9), datetime.datetime(1898, 1, 2), datetime.datetime(1950, 12, 9), datetime.datetime(1950, 12, 9)], 'bool': [True, True, False, True, False, True, True, False, True] } self.df = pd.DataFrame(self.data) self.df['somedate'] = pd.to_datetime(self.df['somedate']) self.results = describe(self.df) self.test_dir = tempfile.mkdtemp()
def makeWeekly(data): columnList = data.columns.tolist() columnCount = len(columnList)-2 if columnCount < 1: sys.exit("you need at least 1 column") data[columnList[0]] = pd.to_datetime(data[columnList[0]]) cl = tuple(columnList[1:-1]) data1 = data.groupby([pd.Grouper(key = columnList[0], freq='W'), *cl], as_index = False)[columnList[-1]].sum() data2 = data.groupby([pd.Grouper(key = columnList[0], freq='W'), *cl])[columnList[-1]].sum() data1['week'] = data2.index.get_level_values(columnList[0]) cols = data1.columns.tolist() cols = cols[-1:] + cols[:-1] data1 = data1[cols] return data1 #%% Create Ordering Function
def df_maker(stats, logged_user): symbol_slug = stats['symbol__symbol'] period_slug = stats['period__period'] system_slug = stats['system__title'] broker_slug = stats['broker__slug'] direction_slug = get_direction(stats=stats) broker = stats['broker__title'] period = stats['period__name'] symbol = stats['symbol__symbol'] system = stats['system__title'] meta_image = stats['img'] heat_image = stats['heatmap'] yearly_image = stats['yearly_ret'] mc_image = stats['mc'] portfolio = get_index_portfolio(logged_user=logged_user, stats=stats) in_file = join(settings.DATA_PATH, "performance", "{0}=={1}=={2}=={3}".format(\ broekr_slug_to_title(broker_slug=broker_slug), symbol_slug, period_slug, system_slug)) df = nonasy_df_multi_reader(filename=in_file, limit=settings.LIMIT_ENABLED) df.index = to_datetime(df.index).to_pydatetime() return (symbol_slug, period_slug, system_slug, broker_slug, direction_slug, \ broker, period, symbol, system, meta_image, portfolio, df, heat_image, \ yearly_image, mc_image)
def gen_time_data(df): t = {} now = date.today() t["ye"] = now.year t["mo"] = now.month t["to_day"] = now.day t["dow"] = now.weekday() t["prev_day"] = await get_prev_day(d=t["to_day"], mo=t["mo"]) t["prev_mo"] = await get_prev_mo(mo=t["mo"]) t["end_prev_day"] = [30, 31] df['ts'] = df.index df['ts'] = to_datetime(df['ts']) t["df_year"] = df['ts'].ix[-1].to_pydatetime().year t["df_month"] = df['ts'].ix[-1].to_pydatetime().month t["df_day"] = df['ts'].ix[-1].to_pydatetime().day t["df_weekday"] = df['ts'].ix[-1].to_pydatetime().weekday() return t, df
def make_df(resp) -> Union[pd.DataFrame, Iterable[Tuple[str, pd.DataFrame]]]: """Makes list of DataFrames from a response object""" def maker(series) -> pd.DataFrame: df = pd.DataFrame(series['values'], columns=series['columns']) df = df.set_index(pd.to_datetime(df['time'])).drop('time', axis=1) # type: pd.DataFrame df.index = df.index.tz_localize('UTC') df.index.name = None if 'name' in series: df.name = series['name'] return df df_list = [(series['name'], maker(series)) for statement in resp['results'] for series in statement['series']] if len(df_list) == 1: return df_list[0][1] else: return df_list
def parse_raw(filepath,seconds=1): ''' ??filepath????????????? :param filepath: ??????????????? :param seconds: int?????????????? :return: dataframe??index??????columns?????? ''' data_head=pd.read_csv(filepath,delim_whitespace=True,header=None,nrows=1) data=pd.read_csv(filepath,delim_whitespace=True,header=None,skiprows=2) date_start=data_head.iloc[0,3] time_start=data.iloc[1,0]+' '+data.iloc[1,1] datetime_start=pd.to_datetime(date_start+' '+time_start) columns=list(data.iloc[0,2:]) newdata=data.iloc[1:,2:].applymap(convert2float) newdata=newdata.dropna(axis=0,how='any') newdata=newdata.loc[(newdata.applymap(type)==type('')).sum(axis=1)<newdata.shape[1]] newdata=newdata.applymap(convert2float) newdata.columns=columns newdata.index=pd.date_range(start=datetime_start,periods=newdata.shape[0],freq='%dS'%seconds) newdata.index.name='datetime' return newdata
def get_year_start_end(dt, first_day=None, last_day=None): """ The first and last day of the year for the specified date. Parameters ---------- dt: datetime first_day: datetime last_day: datetime Returns ------- datetime, datetime """ year_start = first_day if first_day \ else pd.to_datetime(date(dt.year, 1, 1), utc=True) year_end = last_day if last_day \ else pd.to_datetime(date(dt.year, 12, 31), utc=True) if year_end > pd.Timestamp.utcnow(): year_end = pd.Timestamp.utcnow().floor('1D') return year_start, year_end
def test_nearest_unequal_elements(self, tz): dts = pd.to_datetime( ['2014-01-01', '2014-01-05', '2014-01-06', '2014-01-09'], ).tz_localize(tz) def t(s): return None if s is None else pd.Timestamp(s, tz=tz) for dt, before, after in (('2013-12-30', None, '2014-01-01'), ('2013-12-31', None, '2014-01-01'), ('2014-01-01', None, '2014-01-05'), ('2014-01-02', '2014-01-01', '2014-01-05'), ('2014-01-03', '2014-01-01', '2014-01-05'), ('2014-01-04', '2014-01-01', '2014-01-05'), ('2014-01-05', '2014-01-01', '2014-01-06'), ('2014-01-06', '2014-01-05', '2014-01-09'), ('2014-01-07', '2014-01-06', '2014-01-09'), ('2014-01-08', '2014-01-06', '2014-01-09'), ('2014-01-09', '2014-01-06', None), ('2014-01-10', '2014-01-09', None), ('2014-01-11', '2014-01-09', None)): computed = nearest_unequal_elements(dts, t(dt)) expected = (t(before), t(after)) self.assertEqual(computed, expected)
def test_nearest_unequal_elements_short_dts(self, tz): # Length 1. dts = pd.to_datetime(['2014-01-01']).tz_localize(tz) def t(s): return None if s is None else pd.Timestamp(s, tz=tz) for dt, before, after in (('2013-12-31', None, '2014-01-01'), ('2014-01-01', None, None), ('2014-01-02', '2014-01-01', None)): computed = nearest_unequal_elements(dts, t(dt)) expected = (t(before), t(after)) self.assertEqual(computed, expected) # Length 0 dts = pd.to_datetime([]).tz_localize(tz) for dt, before, after in (('2013-12-31', None, None), ('2014-01-01', None, None), ('2014-01-02', None, None)): computed = nearest_unequal_elements(dts, t(dt)) expected = (t(before), t(after)) self.assertEqual(computed, expected)
def test_nearest_unequal_bad_input(self): with self.assertRaises(ValueError) as e: nearest_unequal_elements( pd.to_datetime(['2014', '2014']), pd.Timestamp('2014'), ) self.assertEqual(str(e.exception), 'dts must be unique') with self.assertRaises(ValueError) as e: nearest_unequal_elements( pd.to_datetime(['2014', '2013']), pd.Timestamp('2014'), ) self.assertEqual( str(e.exception), 'dts must be sorted in increasing order', )
def make_expected_out(cls): expected = pd.DataFrame(columns=[cls.columns[col] + '1' for col in cls.columns] + [cls.columns[col] + '2' for col in cls.columns], index=cls.trading_days) for (col, raw_name), suffix in itertools.product( cls.columns.items(), ('1', '2') ): expected_name = raw_name + suffix if col.dtype == datetime64ns_dtype: expected[expected_name] = pd.to_datetime( expected[expected_name] ) else: expected[expected_name] = expected[ expected_name ].astype(col.dtype) cls.fill_expected_out(expected) return expected.reindex(cls.trading_days)
def test_spot_value(self): # data_frequency = 'daily' # exchange_name = 'poloniex' # exchange = get_exchange(exchange_name) # exchange_bundle = ExchangeBundle(exchange) # assets = [ # exchange.get_asset('btc_usdt') # ] # dt = pd.to_datetime('2017-10-14', utc=True) # values = exchange_bundle.get_spot_values( # assets=assets, # field='close', # dt=dt, # data_frequency=data_frequency # ) pass
def test_ingest_minute_all(self): exchange_name = 'bitfinex' # start = pd.to_datetime('2017-09-01', utc=True) start = pd.to_datetime('2017-10-01', utc=True) end = pd.to_datetime('2017-10-05', utc=True) exchange_bundle = ExchangeBundle(get_exchange(exchange_name)) log.info('ingesting exchange bundle {}'.format(exchange_name)) exchange_bundle.ingest( data_frequency='minute', exclude_symbols=None, start=start, end=end, show_progress=True ) pass
def main_bundle_to_csv(self): exchange_name = 'poloniex' data_frequency = 'minute' exchange = get_exchange(exchange_name) asset = exchange.get_asset('eth_btc') start_dt = pd.to_datetime('2016-5-31', utc=True) end_dt = pd.to_datetime('2016-6-1', utc=True) self._bundle_to_csv( asset=asset, exchange_name=exchange.name, data_frequency=data_frequency, filename='{}_{}_{}'.format( exchange_name, data_frequency, asset.symbol ), start_dt=start_dt, end_dt=end_dt )
def test_ingest_csv(self): data_frequency = 'minute' exchange_name = 'bittrex' path = '/Users/fredfortier/Dropbox/Enigma/Data/bittrex_bat_eth.csv' exchange_bundle = ExchangeBundle(exchange_name) exchange_bundle.ingest_csv(path, data_frequency) exchange = get_exchange(exchange_name) asset = exchange.get_asset('bat_eth') start_dt = pd.to_datetime('2017-6-3', utc=True) end_dt = pd.to_datetime('2017-8-3 19:24', utc=True) self._bundle_to_csv( asset=asset, exchange_name=exchange.name, data_frequency=data_frequency, filename='{}_{}_{}'.format( exchange_name, data_frequency, asset.symbol ), start_dt=start_dt, end_dt=end_dt ) pass
def test_get_candles(self): log.info('retrieving candles') assets = self.exchange.get_asset('eth_btc') ohlcv = self.exchange.get_candles( # end_dt=pd.to_datetime('2017-11-01', utc=True), end_dt=None, freq='5T', assets=assets, bar_count=200 ) df = pd.DataFrame(ohlcv) df.set_index('last_traded', drop=True, inplace=True) log.info(df.tail(25)) path = output_df(df, assets, '5min_candles') log.info('saved candles: {}'.format(path)) pass
def test_bcolz_write_daily_present(self): start = pd.to_datetime('2017-01-01') end = pd.to_datetime('today') freq = 'daily' df = self.generate_df('bitfinex', freq, start, end) writer = BcolzExchangeBarWriter( rootdir=self.root_dir, start_session=start, end_session=end, data_frequency=freq, write_metadata=True) data = [] data.append((1, df)) writer.write(data) pass
def test_bcolz_write_minute_past(self): start = pd.to_datetime('2015-04-01 00:00') end = pd.to_datetime('2015-04-30 23:59') freq = 'minute' df = self.generate_df('bitfinex', freq, start, end) writer = BcolzExchangeBarWriter( rootdir=self.root_dir, start_session=start, end_session=end, data_frequency=freq, write_metadata=True) data = [] data.append((1, df)) writer.write(data) pass
def test_bcolz_write_minute_present(self): start = pd.to_datetime('2017-10-01 00:00') end = pd.to_datetime('today') freq = 'minute' df = self.generate_df('bitfinex', freq, start, end) writer = BcolzExchangeBarWriter( rootdir=self.root_dir, start_session=start, end_session=end, data_frequency=freq, write_metadata=True) data = [] data.append((1, df)) writer.write(data) pass
def setup(self): log.info('creating bitfinex exchange') exchanges = get_exchanges(['bitfinex', 'bittrex', 'poloniex']) open_calendar = get_calendar('OPEN') asset_finder = AssetFinderExchange() self.data_portal_live = DataPortalExchangeLive( exchanges=exchanges, asset_finder=asset_finder, trading_calendar=open_calendar, first_trading_day=pd.to_datetime('today', utc=True) ) self.data_portal_backtest = DataPortalExchangeBacktest( exchanges=exchanges, asset_finder=asset_finder, trading_calendar=open_calendar, first_trading_day=None # will set dynamically based on assets )
def get_day_offset(date_vector): r"""Compute the day offsets between games. Parameters ---------- date_vector : pandas.Series The date column. Returns ------- day_offset : pandas.Series A vector of day offsets between adjacent dates. """ dv = pd.to_datetime(date_vector) offsets = pd.to_datetime(dv) - pd.to_datetime(dv[0]) day_offset = offsets.astype('timedelta64[D]').astype(int) return day_offset # # Function get_series_diff #
def load_data(filename, columns, separator): ''' Load the data from filename and sort it according to timestamp. Returns a dataframe with 3 columns: user_id, item_id, rating ''' print('Load data...') data = pd.read_csv(filename, sep=separator, names=list(columns), index_col=False, usecols=range(len(columns))) if 'r' not in columns: # Add a column of default ratings data['r'] = 1 if 't' in columns: # sort according to the timestamp column if data['t'].dtype == np.int64: # probably a timestamp data['t'] = pd.to_datetime(data['t'], unit='s') else: data['t'] = pd.to_datetime(data['t']) print('Sort data in chronological order...') data.sort_values('t', inplace=True) return data
def to_pd_dt(filename): return pd.to_datetime(filename, format='pageviews-%Y%m%d-%H0000') # ## Hive Metastore # Ibis allows us to interogate the hive metastore. We can determine if # databases or tables exists by using functions defined directly on the ibis_ # connection. # # It is useful for us to determine if a database exists and then create it if # it does not.
def row_to_tuple(row): timestamp = pd.to_datetime("{0}-{1}-{2} {3}:00:00".format(row.year, row.month, row.day, row.hour)) return (row.page_name, (timestamp, row.n_views))
def dt_to_epoch_ns(dt_series): index = pd.to_datetime(dt_series.values) try: index = index.tz_localize('UTC') except TypeError: index = index.tz_convert('UTC') return index.view(np.int64)
def to_series(knowledge_dates, earning_dates): """ Helper for converting a dict of strings to a Series of datetimes. This is just for making the test cases more readable. """ return pd.Series( index=pd.to_datetime(knowledge_dates), data=pd.to_datetime(earning_dates), )
def _calc_minute_index(market_opens, minutes_per_day): minutes = np.zeros(len(market_opens) * minutes_per_day, dtype='datetime64[ns]') deltas = np.arange(0, minutes_per_day, dtype='timedelta64[m]') for i, market_open in enumerate(market_opens): start = market_open.asm8 minute_values = start + deltas start_ix = minutes_per_day * i end_ix = start_ix + minutes_per_day minutes[start_ix:end_ix] = minute_values return pd.to_datetime(minutes, utc=True, box=True)