Python pandas 模块,to_datetime() 实例源码

我们从Python开源项目中,提取了以下50个代码示例,用于说明如何使用pandas.to_datetime()

项目:sensu_drive    作者:ilavender    | 项目源码 | 文件源码
def y_sum_by_time(x_arr, y_arr, top=None):
    df = pd.DataFrame({'Timestamp': pd.to_datetime(x_arr, unit='s'), 'Status': y_arr})
    df['Date'] = df['Timestamp'].apply(lambda x: "%d/%d/%d" % (x.day, x.month, x.year))
    df['Hour'] = df['Timestamp'].apply(lambda x: "%d" % (x.hour))
    df['Weekday'] = df['Timestamp'].apply(lambda x: "%s" % (x.weekday_name))

    times = ['Hour', 'Weekday', 'Date']

    result = {}

    for groupby in times:

        df_group = df.groupby(groupby, as_index=False).agg({'Status': np.sum})

        if top != None and top > 0:
            #df_group = df_group.nlargest(top, 'Status').sort(['Status', 'Hour'],ascending=False)
            idx = df_group.nlargest(top, 'Status') > 0
        else:
            idx = df_group['Status'].max() == df_group['Status']

        result[groupby] = {k: g['Status'].replace(np.nan, 'None').tolist() for k,g in df_group[idx].groupby(groupby)}

    return result
项目:scikit-dataaccess    作者:MITHaystack    | 项目源码 | 文件源码
def output(self):
        ''' 
        Generate data wrapper for Mahali data

        @return Mahali data wrapper
        '''
        nav_files, obs_files = self.cacheData()

        def getSiteAndDate(in_filename):
            date = pd.to_datetime('2015' + in_filename[-8:-5], format='%Y%j')
            return in_filename[-12:-8], date


        data_list = []
        for nav, obs in zip(nav_files, obs_files):
            site, date = getSiteAndDate(nav)

            if (site,date) != getSiteAndDate(obs):
                raise RuntimeError('Data mismatch')

            # data_list.append([site,date,readRinexNav(nav), rinexobs(obs)])
            data_list.append([site,date,nav, obs])

        return DataWrapper(data_list)
项目:scikit-dataaccess    作者:MITHaystack    | 项目源码 | 文件源码
def __init__(self,  ap_paramList = [], start_date = None, end_date = None, cutoff=0.75):
        ''' 
        Construct a Groundwater Data Fetcher

        @param ap_paramList[LowerLat]: Autoparam Lower latitude
        @param ap_paramList[UpperLat]: Autoparam Upper latitude
        @param ap_paramList[LeftLon]: Autoparam Left longitude
        @param ap_paramList[RightLon]: Autoparam Right longitude
        @param start_date: Starting date (defualt: None)
        @param end_date: Ending date (default: None)
        @param cutoff: Required amount of data for each station
        '''



        self.start_date   = pd.to_datetime(start_date)
        self.end_date     = pd.to_datetime(end_date)
        self.ap_paramList = ap_paramList
        self.cutoff = cutoff
项目:scikit-dataaccess    作者:MITHaystack    | 项目源码 | 文件源码
def convert_date(in_date):
    '''
    Converts input string to pandas date time, ignores other types of objects

    @param in_date: Input date

    return pandas data time object
    '''
    if isinstance(in_date,str):
        try:
            return pd.to_datetime(in_date)
        except ValueError as e:
            return pd.to_datetime(in_date, format='%Y%j')

    else:
        return in_date
项目:QUANTAXIS    作者:yutiansut    | 项目源码 | 文件源码
def QA_fetch_get_security_bars(code, _type, lens, ip=best_ip['stock'], port=7709):
    api = TdxHq_API()
    with api.connect(ip, port):
        data = pd.concat([api.to_df(api.get_security_bars(_select_type(_type), _select_market_code(
            code), code, (i - 1) * 800, 800)) for i in range(1, int(lens / 800) + 2)], axis=0)
        data = data\
            .assign(datetime=pd.to_datetime(data['datetime']), code=str(code))\
            .drop(['year', 'month', 'day', 'hour', 'minute'], axis=1, inplace=False)\
            .assign(date=data['datetime'].apply(lambda x: str(x)[0:10]))\
            .assign(date_stamp=data['datetime'].apply(lambda x: QA_util_date_stamp(x)))\
            .assign(time_stamp=data['datetime'].apply(lambda x: QA_util_time_stamp(x)))\
            .assign(type=_type).set_index('datetime', drop=False, inplace=False).tail(lens)
        if data is not None:
            return data
        else:
            return None
项目:QUANTAXIS    作者:yutiansut    | 项目源码 | 文件源码
def QA_fetch_get_stock_xdxr(code, ip=best_ip['stock'], port=7709):
    '????'
    api = TdxHq_API()
    market_code = _select_market_code(code)
    with api.connect(ip, port):
        category = {
            '1': '????', '2': '?????', '3': '??????', '4': '??????', '5': '????',
            '6': '????', '7': '????', '8': '??????', '9': '?????', '10': '?????',
            '11': '???', '12': '??????', '13':  '?????', '14': '?????'}
        data = api.to_df(api.get_xdxr_info(market_code, code))
        if len(data) >= 1:
            data = data\
                .assign(date=pd.to_datetime(data[['year', 'month', 'day']]))\
                .drop(['year', 'month', 'day'], axis=1)\
                .assign(category_meaning=data['category'].apply(lambda x: category[str(x)]))\
                .assign(code=str(code))\
                .rename(index=str, columns={'panhouliutong': 'liquidity_after',
                                            'panqianliutong': 'liquidity_before', 'houzongguben': 'shares_after',
                                            'qianzongguben': 'shares_before'})\
                .set_index('date', drop=False, inplace=False)
            return data.assign(date=data['date'].apply(lambda x: str(x)[0:10]))
        else:
            return None
项目:QUANTAXIS    作者:yutiansut    | 项目源码 | 文件源码
def QA_data_stock_to_fq(__data, type_='01'):

    def __QA_fetch_stock_xdxr(code, format_='pd', collections=QA_Setting.client.quantaxis.stock_xdxr):
        '????????/???'
        try:
            data = pd.DataFrame([item for item in collections.find(
                {'code': code})]).drop(['_id'], axis=1)
            data['date'] = pd.to_datetime(data['date'])
            return data.set_index(['date', 'code'], drop=False)
        except:
            return pd.DataFrame(columns=['category', 'category_meaning', 'code', 'date', 'fenhong',
                                         'fenshu', 'liquidity_after', 'liquidity_before', 'name', 'peigu', 'peigujia',
                                         'shares_after', 'shares_before', 'songzhuangu', 'suogu', 'xingquanjia'])
    '?? ??/??? ??????'
    if type_ in ['01', 'qfq']:
        #print(QA_data_make_qfq(__data, __QA_fetch_stock_xdxr(__data['code'][0])))
        return QA_data_make_qfq(__data, __QA_fetch_stock_xdxr(__data['code'][0]))
    elif type_ in ['02', 'hfq']:
        return QA_data_make_hfq(__data, __QA_fetch_stock_xdxr(__data['code'][0]))
    else:
        QA_util_log_info('wrong fq type! Using qfq')
        return QA_data_make_qfq(__data, __QA_fetch_stock_xdxr(__data['code'][0]))
项目:klineyes    作者:tenstone    | 项目源码 | 文件源码
def mfi(df):
    df['date'] = pd.to_datetime(df.date)

    fig = plt.figure(figsize=(16, 9))
    gs = GridSpec(3, 1) # 2 rows, 3 columns
    fig.suptitle(df['date'][-1:].values[0])
    fig.set_label('MFI')
    price = fig.add_subplot(gs[:2, 0])
    price.plot(df['date'], df['close'], color='blue')

    indicator = fig.add_subplot(gs[2, 0], sharex=price)
    indicator.plot(df['date'], df['mfi'], c='pink')
    indicator.plot(df['date'], [20.]*len(df['date']), c='green')
    indicator.plot(df['date'], [80.]*len(df['date']), c='orange')

    price.grid(True)
    indicator.grid(True)
    plt.tight_layout()
    plt.show()
项目:klineyes    作者:tenstone    | 项目源码 | 文件源码
def atr(df):
    '''
    Average True Range
    :param df:
    :return:
    '''
    df['date'] = pd.to_datetime(df.date)

    fig = plt.figure(figsize=(16, 9))
    gs = GridSpec(3, 1) # 2 rows, 3 columns
    fig.suptitle(df['date'][-1:].values[0])
    fig.set_label('ATR')
    price = fig.add_subplot(gs[:2, 0])
    price.plot(df['date'], df['close'], color='blue')

    indicator = fig.add_subplot(gs[2, 0], sharex=price)
    indicator.plot(df['date'], df['atr'], c='pink')
    # indicator.plot(df['date'], [20.]*len(df['date']), c='green')
    # indicator.plot(df['date'], [80.]*len(df['date']), c='orange')

    price.grid(True)
    indicator.grid(True)
    plt.tight_layout()
    plt.show()
项目:klineyes    作者:tenstone    | 项目源码 | 文件源码
def rocr(df):
    '''
    Average True Range
    :param df:
    :return:
    '''
    df['date'] = pd.to_datetime(df.date)

    fig = plt.figure(figsize=(16, 9))
    gs = GridSpec(3, 1) # 2 rows, 3 columns
    fig.suptitle(df['date'][-1:].values[0])
    fig.set_label('ATR')
    price = fig.add_subplot(gs[:2, 0])
    price.plot(df['date'], df['close'], color='blue')

    indicator = fig.add_subplot(gs[2, 0], sharex=price)
    indicator.plot(df['date'], df['rocr'], c='pink')
    # indicator.plot(df['date'], [20.]*len(df['date']), c='green')
    # indicator.plot(df['date'], [80.]*len(df['date']), c='orange')

    price.grid(True)
    indicator.grid(True)
    plt.tight_layout()
    plt.show()
项目:klineyes    作者:tenstone    | 项目源码 | 文件源码
def get_indicator(df, indicator):
        ret_df = df
        if 'MACD' in indicator:
            macd, macdsignal, macdhist = ta.MACD(df.close.values, fastperiod=12, slowperiod=26, signalperiod=9)
            ret_df = KlineData._merge_dataframe(pd.DataFrame([macd, macdsignal, macdhist]).T.rename(columns={0: "macddif", 1: "macddem", 2: "macdhist"}), ret_df)
            ret_df = KlineData._merge_dataframe(line_intersections(ret_df, columns=['macddif', 'macddem']), ret_df)
        if 'MFI' in indicator:
            real = ta.MFI(df.high.values, df.low.values, df.close.values, df.volume.values, timeperiod=14)
            ret_df = KlineData._merge_dataframe(pd.DataFrame([real]).T.rename(columns={0: "mfi"}), ret_df)
        if 'ATR' in indicator:
            real = ta.NATR(df.high.values, df.low.values, df.close.values, timeperiod=14)
            ret_df = KlineData._merge_dataframe(pd.DataFrame([real]).T.rename(columns={0: "atr"}), ret_df)
        if 'ROCR' in indicator:
            real = ta.ROCR(df.close.values, timeperiod=10)
            ret_df = KlineData._merge_dataframe(pd.DataFrame([real]).T.rename(columns={0: "rocr"}), ret_df)
        ret_df['date'] = pd.to_datetime(ret_df['date'], format='%Y-%m-%d')
        return ret_df
项目:Eskapade    作者:KaveIO    | 项目源码 | 文件源码
def to_ns(x):
    """Convert input timestamps to nanoseconds (integers)

    :param x: value to be converted
    :returns: converted value
    :rtype: int
    """

    if pd.isnull(x):
        return 0
    try:
        return pd.to_datetime(x).value
    except:
        if hasattr(x, '__str__'):
            return pd.to_datetime(str(x)).value
    return 0
项目:auto_ml    作者:doordash    | 项目源码 | 文件源码
def get_twitter_sentiment_multilabel_classification_dataset():

    file_name = os.path.join('tests', 'twitter_sentiment.csv')

    try:
        df_twitter = pd.read_csv(open(file_name,'rU'), encoding='utf-8', engine='python')
    except Exception as e:
        print('Error')
        print(e)
        dataset_url = 'https://raw.githubusercontent.com/ClimbsRocks/sample_datasets/master/twitter_airline_sentiment.csv'
        df_twitter = pd.read_csv(dataset_url)
        # Do not write the index that pandas automatically creates

        df_twitter.to_csv(file_name, index=False)

    # Grab only 10% of the dataset- runs much faster this way
    df_twitter = df_twitter.sample(frac=0.1)

    df_twitter['tweet_created'] = pd.to_datetime(df_twitter.tweet_created)

    df_twitter_train, df_twitter_test = train_test_split(df_twitter, test_size=0.33, random_state=42)
    return df_twitter_train, df_twitter_test
项目:auto_ml    作者:doordash    | 项目源码 | 文件源码
def add_date_features_df(df, date_col):
    # Pandas nicely tries to prevent you from doing stupid things, like setting values on a copy of a df, not your real one
    # However, it's a bit overzealous in this case, so we'll side-step a bunch of warnings by setting is_copy to false here
    df.is_copy = False

    df[date_col] = pd.to_datetime(df[date_col])
    df[date_col + '_day_of_week'] = df[date_col].apply(lambda x: x.weekday()).astype(int, raise_on_error=False)

    try:
        df[date_col + '_hour'] = df[date_col].apply(lambda x: x.hour).astype(int, raise_on_error=False)

        df[date_col + '_minutes_into_day'] = df[date_col].apply(lambda x: x.hour * 60 + x.minute)
    except AttributeError:
        pass

    df[date_col + '_is_weekend'] = df[date_col].apply(lambda x: x.weekday() in (5,6))
    df[date_col + '_day_part'] = df[date_col + '_minutes_into_day'].apply(minutes_into_day_parts)

    df = df.drop([date_col], axis=1)

    return df

# Same logic as above, except implemented for a single dictionary, which is much faster at prediction time when getting just a single prediction
项目:empyrical    作者:quantopian    | 项目源码 | 文件源码
def get_utc_timestamp(dt):
    """
    Returns the Timestamp/DatetimeIndex
    with either localized or converted to UTC.
    Parameters
    ----------
    dt : Timestamp/DatetimeIndex
        the date(s) to be converted
    Returns
    -------
    same type as input
        date(s) converted to UTC
    """

    dt = pd.to_datetime(dt)
    try:
        dt = dt.tz_localize('UTC')
    except TypeError:
        dt = dt.tz_convert('UTC')
    return dt
项目:ceres    作者:dicortazar    | 项目源码 | 文件源码
def format_dates(self, data, columns):
        """ This method translates columns values into datetime objects

        :param data: original Pandas dataframe
        :param columns: list of columns to cast the date to a datetime object
        :type data: pandas.DataFrame
        :type columns: list of strings

        :returns: Pandas dataframe with updated 'columns' with datetime objects
        :rtype: pandas.DataFrame
        """

        for column in columns:
            if column in data.columns:
                data[column] = pandas.to_datetime(data[column])

        return data
项目:slaveo    作者:lamter    | 项目源码 | 文件源码
def __init__(self, begin=None, end=None):
        """
        self.calendar ????
            type  weekday    next_td   tradeday day_trade night_trade  midnight_trade
date
2016-01-01     2        5 2016-01-04 2016-01-01      True        True           True
2016-01-02     3        6 2016-01-04 2016-01-04     False       False           True

        :param begin:
        :param end:
        """

        # ?????, pd.Sereis, date: type
        self.holidays = self.get_holiday_json()

        self.begin = begin or self.yearbegin()
        self.end = end or self.yearend()  # ??1?10?
        if self.holidays.shape[0]:
            end = max(self.holidays.index)
            end = pd.to_datetime(end)
            self.end = self.end.replace(end.year + 1)

        # ????
        self.calendar = self.getCalendar()
项目:sci-pype    作者:jay-johnson    | 项目源码 | 文件源码
def ml_regression_build_prediction_test_window(self, req, num_units, rds, dbs):

        import pandas as pd

        ml_type                 = req["MLType"]
        target_column_name      = req["TargetColumnName"]   # What column is getting processed?
        target_column_values    = req["TargetColumnValues"] # Possible values each int in the target_column_name maps to
        train_feature_names     = req["TrainFeatures"]      # Pass in the features to train
        source_df               = req["SourceDF"]
        sample_filter_mask      = (source_df["DSName"] != "")

        new_df                  = source_df.iloc[-1 * int(num_units):]

        if "Date" in str(source_df.columns):
            new_df["Date"]      = pd.to_datetime(new_df["Date"], format='%Y-%m-%d')  # assuming the Date column is present
        if "FDate" in str(source_df.columns):
            new_df["FDate"]     = pd.to_datetime(new_df["FDate"], format='%Y-%m-%d') # assuming the Future Date column is present

        last_row    = new_df.iloc[-1]

        return new_df
    # end of ml_regression_build_prediction_test_window
项目:Dshield    作者:ywjt    | 项目源码 | 文件源码
def _to_dataframe(self, rs):
        result = {}
        if isinstance(rs, list):
            return map(self._to_dataframe, rs)
        for key, data in rs.items():
            name, tags = key
            if tags is None:
                key = name
            else:
                key = (name, tuple(sorted(tags.items())))
            df = pd.DataFrame(data)
            df.time = pd.to_datetime(df.time)
            df.set_index('time', inplace=True)
            df.index = df.index.tz_localize('UTC')
            df.index.name = None
            result[key] = df
        return result
项目:Dshield    作者:ywjt    | 项目源码 | 文件源码
def test_query_into_dataframe(self):
        data = [
            {
                "name": "foo",
                "columns": ["time", "sequence_number", "column_one"],
                "points": [
                    [3600, 16, 2], [3600, 15, 1],
                    [0, 14, 2], [0, 13, 1]
                ]
            }
        ]
        # dataframe sorted ascending by time first, then sequence_number
        dataframe = pd.DataFrame(data=[[13, 1], [14, 2], [15, 1], [16, 2]],
                                 index=pd.to_datetime([0, 0,
                                                      3600, 3600],
                                                      unit='s', utc=True),
                                 columns=['sequence_number', 'column_one'])
        with _mocked_session('get', 200, data):
            cli = DataFrameClient('host', 8086, 'username', 'password', 'db')
            result = cli.query('select column_one from foo;')
            assert_frame_equal(dataframe, result)
项目:Dshield    作者:ywjt    | 项目源码 | 文件源码
def _to_dataframe(self, json_result, time_precision):
        dataframe = pd.DataFrame(data=json_result['points'],
                                 columns=json_result['columns'])
        if 'sequence_number' in dataframe.keys():
            dataframe.sort_values(['time', 'sequence_number'], inplace=True)
        else:
            dataframe.sort_values(['time'], inplace=True)
        pandas_time_unit = time_precision
        if time_precision == 'm':
            pandas_time_unit = 'ms'
        elif time_precision == 'u':
            pandas_time_unit = 'us'
        dataframe.index = pd.to_datetime(list(dataframe['time']),
                                         unit=pandas_time_unit,
                                         utc=True)
        del dataframe['time']
        return dataframe
项目:Dshield    作者:ywjt    | 项目源码 | 文件源码
def _convert_dataframe_to_json(self, dataframe, name, time_precision='s'):
        if not isinstance(dataframe, pd.DataFrame):
            raise TypeError('Must be DataFrame, but type was: {0}.'
                            .format(type(dataframe)))
        if not (isinstance(dataframe.index, pd.tseries.period.PeriodIndex) or
                isinstance(dataframe.index, pd.tseries.index.DatetimeIndex)):
            raise TypeError('Must be DataFrame with DatetimeIndex or \
                            PeriodIndex.')

        if isinstance(dataframe.index, pd.tseries.period.PeriodIndex):
            dataframe.index = dataframe.index.to_timestamp()
        else:
            dataframe.index = pd.to_datetime(dataframe.index)

        if dataframe.index.tzinfo is None:
            dataframe.index = dataframe.index.tz_localize('UTC')
        dataframe['time'] = [self._datetime_to_epoch(dt, time_precision)
                             for dt in dataframe.index]
        data = {'name': name,
                'columns': [str(column) for column in dataframe.columns],
                'points': [self._convert_array(x) for x in dataframe.values]}
        return data
项目:rl_trading    作者:ucaiado    | 项目源码 | 文件源码
def parse_header(self, l_data):
        '''
        Format the list of the header passe and return a dictionary

        :param l_data: list. list with the elements of the parsed row
        '''
        d_rtn = {}
        # Name of file
        d_rtn['name_of_file'] = l_data[1]
        # Initial date of file
        d_rtn['initial_date'] = pd.to_datetime(l_data[2], format='%Y-%m-%d')
        d_rtn['initial_date'] = d_rtn['initial_date'].date()
        # End date of file
        d_rtn['end_date'] = pd.to_datetime(l_data[3], format='%Y-%m-%d')
        d_rtn['end_date'] = d_rtn['end_date'].date()
        # Contain the total of lines when the file Trailer record
        d_rtn['total_of_lines'] = int(l_data[4])

        return d_rtn
项目:pandas-profiling    作者:JosPolfliet    | 项目源码 | 文件源码
def setUp(self):
        self.data = {'id': [chr(97 + c) for c in range(1, 10)],
                     'x': [50, 50, -10, 0, 0, 5, 15, -3, None],
                     'y': [0.000001, 654.152, None, 15.984512, 3122, -3.1415926535, 111, 15.9, 13.5],
                     'cat': ['a', 'long text value', u'Élysée', '', None, 'some <b> B.s </div> </div> HTML stuff', 'c',
                             'c',
                             'c'],
                     's1': np.ones(9),
                     's2': [u'some constant text $ % value {obj} ' for _ in range(1, 10)],
                     'somedate': [datetime.date(2011, 7, 4), datetime.datetime(2022, 1, 1, 13, 57),
                                  datetime.datetime(1990, 12, 9), np.nan,
                                  datetime.datetime(
                                      1990, 12, 9), datetime.datetime(1950, 12, 9),
                                  datetime.datetime(1898, 1, 2), datetime.datetime(1950, 12, 9), datetime.datetime(1950, 12, 9)],
                     'bool': [True, True, False, True, False, True, True, False, True]
                     }
        self.df = pd.DataFrame(self.data)
        self.df['somedate'] = pd.to_datetime(self.df['somedate'])

        self.results = describe(self.df)
        self.test_dir = tempfile.mkdtemp()
项目:htsprophet    作者:CollinRooney12    | 项目源码 | 文件源码
def makeWeekly(data):
    columnList = data.columns.tolist()
    columnCount = len(columnList)-2
    if columnCount < 1:
        sys.exit("you need at least 1 column")
    data[columnList[0]] = pd.to_datetime(data[columnList[0]])
    cl = tuple(columnList[1:-1])
    data1 = data.groupby([pd.Grouper(key = columnList[0], freq='W'), *cl], as_index = False)[columnList[-1]].sum()
    data2 = data.groupby([pd.Grouper(key = columnList[0], freq='W'), *cl])[columnList[-1]].sum()
    data1['week'] = data2.index.get_level_values(columnList[0])
    cols = data1.columns.tolist()
    cols = cols[-1:] + cols[:-1]
    data1 = data1[cols]
    return data1

#%% Create Ordering Function
项目:Quantrade    作者:quant-trade    | 项目源码 | 文件源码
def df_maker(stats, logged_user):
    symbol_slug = stats['symbol__symbol']
    period_slug = stats['period__period']
    system_slug = stats['system__title']
    broker_slug = stats['broker__slug']
    direction_slug = get_direction(stats=stats)
    broker = stats['broker__title']
    period = stats['period__name']
    symbol = stats['symbol__symbol']
    system = stats['system__title']
    meta_image = stats['img']
    heat_image = stats['heatmap']
    yearly_image = stats['yearly_ret']
    mc_image = stats['mc']
    portfolio = get_index_portfolio(logged_user=logged_user, stats=stats)

    in_file = join(settings.DATA_PATH, "performance", "{0}=={1}=={2}=={3}".format(\
        broekr_slug_to_title(broker_slug=broker_slug), symbol_slug, period_slug, system_slug))
    df = nonasy_df_multi_reader(filename=in_file, limit=settings.LIMIT_ENABLED)
    df.index = to_datetime(df.index).to_pydatetime()

    return (symbol_slug, period_slug, system_slug, broker_slug, direction_slug, \
        broker, period, symbol, system, meta_image, portfolio, df, heat_image, \
        yearly_image, mc_image)
项目:Quantrade    作者:quant-trade    | 项目源码 | 文件源码
def gen_time_data(df):
    t = {}
    now = date.today()
    t["ye"] = now.year
    t["mo"] = now.month
    t["to_day"] = now.day
    t["dow"] = now.weekday()
    t["prev_day"] = await get_prev_day(d=t["to_day"], mo=t["mo"])
    t["prev_mo"] = await get_prev_mo(mo=t["mo"])
    t["end_prev_day"] = [30, 31]
    df['ts'] = df.index
    df['ts'] = to_datetime(df['ts'])
    t["df_year"] = df['ts'].ix[-1].to_pydatetime().year
    t["df_month"] = df['ts'].ix[-1].to_pydatetime().month
    t["df_day"] = df['ts'].ix[-1].to_pydatetime().day
    t["df_weekday"] = df['ts'].ix[-1].to_pydatetime().weekday()

    return t, df
项目:aioinflux    作者:plugaai    | 项目源码 | 文件源码
def make_df(resp) -> Union[pd.DataFrame, Iterable[Tuple[str, pd.DataFrame]]]:
    """Makes list of DataFrames from a response object"""

    def maker(series) -> pd.DataFrame:
        df = pd.DataFrame(series['values'], columns=series['columns'])
        df = df.set_index(pd.to_datetime(df['time'])).drop('time', axis=1)  # type: pd.DataFrame
        df.index = df.index.tz_localize('UTC')
        df.index.name = None
        if 'name' in series:
            df.name = series['name']
        return df

    df_list = [(series['name'], maker(series))
               for statement in resp['results']
               for series in statement['series']]
    if len(df_list) == 1:
        return df_list[0][1]
    else:
        return df_list
项目:ModelFlow    作者:yuezPrincetechs    | 项目源码 | 文件源码
def parse_raw(filepath,seconds=1):
    '''
    ??filepath?????????????
    :param filepath: ???????????????
    :param seconds: int??????????????
    :return: dataframe??index??????columns??????
    '''
    data_head=pd.read_csv(filepath,delim_whitespace=True,header=None,nrows=1)
    data=pd.read_csv(filepath,delim_whitespace=True,header=None,skiprows=2)
    date_start=data_head.iloc[0,3]
    time_start=data.iloc[1,0]+' '+data.iloc[1,1]
    datetime_start=pd.to_datetime(date_start+' '+time_start)
    columns=list(data.iloc[0,2:])
    newdata=data.iloc[1:,2:].applymap(convert2float)
    newdata=newdata.dropna(axis=0,how='any')
    newdata=newdata.loc[(newdata.applymap(type)==type('')).sum(axis=1)<newdata.shape[1]]
    newdata=newdata.applymap(convert2float)
    newdata.columns=columns
    newdata.index=pd.date_range(start=datetime_start,periods=newdata.shape[0],freq='%dS'%seconds)
    newdata.index.name='datetime'
    return newdata
项目:catalyst    作者:enigmampc    | 项目源码 | 文件源码
def get_year_start_end(dt, first_day=None, last_day=None):
    """
    The first and last day of the year for the specified date.

    Parameters
    ----------

    dt: datetime
    first_day: datetime
    last_day: datetime

    Returns
    -------
    datetime, datetime

    """
    year_start = first_day if first_day \
        else pd.to_datetime(date(dt.year, 1, 1), utc=True)
    year_end = last_day if last_day \
        else pd.to_datetime(date(dt.year, 12, 31), utc=True)

    if year_end > pd.Timestamp.utcnow():
        year_end = pd.Timestamp.utcnow().floor('1D')

    return year_start, year_end
项目:catalyst    作者:enigmampc    | 项目源码 | 文件源码
def test_nearest_unequal_elements(self, tz):

        dts = pd.to_datetime(
            ['2014-01-01', '2014-01-05', '2014-01-06', '2014-01-09'],
        ).tz_localize(tz)

        def t(s):
            return None if s is None else pd.Timestamp(s, tz=tz)

        for dt, before, after in (('2013-12-30', None, '2014-01-01'),
                                  ('2013-12-31', None, '2014-01-01'),
                                  ('2014-01-01', None, '2014-01-05'),
                                  ('2014-01-02', '2014-01-01', '2014-01-05'),
                                  ('2014-01-03', '2014-01-01', '2014-01-05'),
                                  ('2014-01-04', '2014-01-01', '2014-01-05'),
                                  ('2014-01-05', '2014-01-01', '2014-01-06'),
                                  ('2014-01-06', '2014-01-05', '2014-01-09'),
                                  ('2014-01-07', '2014-01-06', '2014-01-09'),
                                  ('2014-01-08', '2014-01-06', '2014-01-09'),
                                  ('2014-01-09', '2014-01-06', None),
                                  ('2014-01-10', '2014-01-09', None),
                                  ('2014-01-11', '2014-01-09', None)):
            computed = nearest_unequal_elements(dts, t(dt))
            expected = (t(before), t(after))
            self.assertEqual(computed, expected)
项目:catalyst    作者:enigmampc    | 项目源码 | 文件源码
def test_nearest_unequal_elements_short_dts(self, tz):

        # Length 1.
        dts = pd.to_datetime(['2014-01-01']).tz_localize(tz)

        def t(s):
            return None if s is None else pd.Timestamp(s, tz=tz)

        for dt, before, after in (('2013-12-31', None, '2014-01-01'),
                                  ('2014-01-01', None, None),
                                  ('2014-01-02', '2014-01-01', None)):
            computed = nearest_unequal_elements(dts, t(dt))
            expected = (t(before), t(after))
            self.assertEqual(computed, expected)

        # Length 0
        dts = pd.to_datetime([]).tz_localize(tz)
        for dt, before, after in (('2013-12-31', None, None),
                                  ('2014-01-01', None, None),
                                  ('2014-01-02', None, None)):
            computed = nearest_unequal_elements(dts, t(dt))
            expected = (t(before), t(after))
            self.assertEqual(computed, expected)
项目:catalyst    作者:enigmampc    | 项目源码 | 文件源码
def test_nearest_unequal_bad_input(self):
        with self.assertRaises(ValueError) as e:
            nearest_unequal_elements(
                pd.to_datetime(['2014', '2014']),
                pd.Timestamp('2014'),
            )

        self.assertEqual(str(e.exception), 'dts must be unique')

        with self.assertRaises(ValueError) as e:
            nearest_unequal_elements(
                pd.to_datetime(['2014', '2013']),
                pd.Timestamp('2014'),
            )

        self.assertEqual(
            str(e.exception),
            'dts must be sorted in increasing order',
        )
项目:catalyst    作者:enigmampc    | 项目源码 | 文件源码
def make_expected_out(cls):
        expected = pd.DataFrame(columns=[cls.columns[col] + '1'
                                         for col in cls.columns] +
                                        [cls.columns[col] + '2'
                                         for col in cls.columns],
                                index=cls.trading_days)

        for (col, raw_name), suffix in itertools.product(
            cls.columns.items(), ('1', '2')
        ):
            expected_name = raw_name + suffix
            if col.dtype == datetime64ns_dtype:
                expected[expected_name] = pd.to_datetime(
                    expected[expected_name]
                )
            else:
                expected[expected_name] = expected[
                    expected_name
                ].astype(col.dtype)
        cls.fill_expected_out(expected)
        return expected.reindex(cls.trading_days)
项目:catalyst    作者:enigmampc    | 项目源码 | 文件源码
def test_spot_value(self):
        # data_frequency = 'daily'
        # exchange_name = 'poloniex'

        # exchange = get_exchange(exchange_name)
        # exchange_bundle = ExchangeBundle(exchange)
        # assets = [
        #     exchange.get_asset('btc_usdt')
        # ]
        # dt = pd.to_datetime('2017-10-14', utc=True)

        # values = exchange_bundle.get_spot_values(
        #     assets=assets,
        #     field='close',
        #     dt=dt,
        #     data_frequency=data_frequency
        # )
        pass
项目:catalyst    作者:enigmampc    | 项目源码 | 文件源码
def test_ingest_minute_all(self):
        exchange_name = 'bitfinex'

        # start = pd.to_datetime('2017-09-01', utc=True)
        start = pd.to_datetime('2017-10-01', utc=True)
        end = pd.to_datetime('2017-10-05', utc=True)

        exchange_bundle = ExchangeBundle(get_exchange(exchange_name))

        log.info('ingesting exchange bundle {}'.format(exchange_name))
        exchange_bundle.ingest(
            data_frequency='minute',
            exclude_symbols=None,
            start=start,
            end=end,
            show_progress=True
        )
        pass
项目:catalyst    作者:enigmampc    | 项目源码 | 文件源码
def main_bundle_to_csv(self):
        exchange_name = 'poloniex'
        data_frequency = 'minute'

        exchange = get_exchange(exchange_name)
        asset = exchange.get_asset('eth_btc')

        start_dt = pd.to_datetime('2016-5-31', utc=True)
        end_dt = pd.to_datetime('2016-6-1', utc=True)
        self._bundle_to_csv(
            asset=asset,
            exchange_name=exchange.name,
            data_frequency=data_frequency,
            filename='{}_{}_{}'.format(
                exchange_name, data_frequency, asset.symbol
            ),
            start_dt=start_dt,
            end_dt=end_dt
        )
项目:catalyst    作者:enigmampc    | 项目源码 | 文件源码
def test_ingest_csv(self):
        data_frequency = 'minute'
        exchange_name = 'bittrex'
        path = '/Users/fredfortier/Dropbox/Enigma/Data/bittrex_bat_eth.csv'

        exchange_bundle = ExchangeBundle(exchange_name)
        exchange_bundle.ingest_csv(path, data_frequency)

        exchange = get_exchange(exchange_name)
        asset = exchange.get_asset('bat_eth')

        start_dt = pd.to_datetime('2017-6-3', utc=True)
        end_dt = pd.to_datetime('2017-8-3 19:24', utc=True)
        self._bundle_to_csv(
            asset=asset,
            exchange_name=exchange.name,
            data_frequency=data_frequency,
            filename='{}_{}_{}'.format(
                exchange_name, data_frequency, asset.symbol
            ),
            start_dt=start_dt,
            end_dt=end_dt
        )
        pass
项目:catalyst    作者:enigmampc    | 项目源码 | 文件源码
def test_get_candles(self):
        log.info('retrieving candles')
        assets = self.exchange.get_asset('eth_btc')
        ohlcv = self.exchange.get_candles(
            # end_dt=pd.to_datetime('2017-11-01', utc=True),
            end_dt=None,
            freq='5T',
            assets=assets,
            bar_count=200
        )
        df = pd.DataFrame(ohlcv)
        df.set_index('last_traded', drop=True, inplace=True)
        log.info(df.tail(25))

        path = output_df(df, assets, '5min_candles')
        log.info('saved candles: {}'.format(path))
        pass
项目:catalyst    作者:enigmampc    | 项目源码 | 文件源码
def test_bcolz_write_daily_present(self):
        start = pd.to_datetime('2017-01-01')
        end = pd.to_datetime('today')
        freq = 'daily'

        df = self.generate_df('bitfinex', freq, start, end)

        writer = BcolzExchangeBarWriter(
            rootdir=self.root_dir,
            start_session=start,
            end_session=end,
            data_frequency=freq,
            write_metadata=True)

        data = []
        data.append((1, df))
        writer.write(data)
        pass
项目:catalyst    作者:enigmampc    | 项目源码 | 文件源码
def test_bcolz_write_minute_past(self):
        start = pd.to_datetime('2015-04-01 00:00')
        end = pd.to_datetime('2015-04-30 23:59')
        freq = 'minute'

        df = self.generate_df('bitfinex', freq, start, end)

        writer = BcolzExchangeBarWriter(
            rootdir=self.root_dir,
            start_session=start,
            end_session=end,
            data_frequency=freq,
            write_metadata=True)

        data = []
        data.append((1, df))
        writer.write(data)

        pass
项目:catalyst    作者:enigmampc    | 项目源码 | 文件源码
def test_bcolz_write_minute_present(self):
        start = pd.to_datetime('2017-10-01 00:00')
        end = pd.to_datetime('today')
        freq = 'minute'

        df = self.generate_df('bitfinex', freq, start, end)

        writer = BcolzExchangeBarWriter(
            rootdir=self.root_dir,
            start_session=start,
            end_session=end,
            data_frequency=freq,
            write_metadata=True)

        data = []
        data.append((1, df))
        writer.write(data)
        pass
项目:catalyst    作者:enigmampc    | 项目源码 | 文件源码
def setup(self):
        log.info('creating bitfinex exchange')
        exchanges = get_exchanges(['bitfinex', 'bittrex', 'poloniex'])
        open_calendar = get_calendar('OPEN')
        asset_finder = AssetFinderExchange()

        self.data_portal_live = DataPortalExchangeLive(
            exchanges=exchanges,
            asset_finder=asset_finder,
            trading_calendar=open_calendar,
            first_trading_day=pd.to_datetime('today', utc=True)
        )

        self.data_portal_backtest = DataPortalExchangeBacktest(
            exchanges=exchanges,
            asset_finder=asset_finder,
            trading_calendar=open_calendar,
            first_trading_day=None  # will set dynamically based on assets
        )
项目:AlphaPy    作者:ScottFreeLLC    | 项目源码 | 文件源码
def get_day_offset(date_vector):
    r"""Compute the day offsets between games.

    Parameters
    ----------
    date_vector : pandas.Series
        The date column.

    Returns
    -------
    day_offset : pandas.Series
        A vector of day offsets between adjacent dates.

    """
    dv = pd.to_datetime(date_vector)
    offsets = pd.to_datetime(dv) - pd.to_datetime(dv[0])
    day_offset = offsets.astype('timedelta64[D]').astype(int)
    return day_offset


#
# Function get_series_diff
#
项目:sequence-based-recommendations    作者:rdevooght    | 项目源码 | 文件源码
def load_data(filename, columns, separator):
    ''' Load the data from filename and sort it according to timestamp.
    Returns a dataframe with 3 columns: user_id, item_id, rating
    '''

    print('Load data...')
    data = pd.read_csv(filename, sep=separator, names=list(columns), index_col=False, usecols=range(len(columns)))

    if 'r' not in columns:
        # Add a column of default ratings
        data['r'] = 1

    if 't' in columns:
        # sort according to the timestamp column
        if data['t'].dtype == np.int64: # probably a timestamp
            data['t'] = pd.to_datetime(data['t'], unit='s')
        else:
            data['t'] = pd.to_datetime(data['t'])
        print('Sort data in chronological order...')
        data.sort_values('t', inplace=True)

    return data
项目:py-hadoop-tutorial    作者:hougs    | 项目源码 | 文件源码
def to_pd_dt(filename):
    return pd.to_datetime(filename, format='pageviews-%Y%m%d-%H0000')

# ## Hive Metastore
# Ibis allows us to interogate the hive metastore. We can determine if
# databases or tables exists by using functions defined directly on the ibis_
# connection.
#
# It is useful for us to determine if a database exists and then create it if
#  it does not.
项目:py-hadoop-tutorial    作者:hougs    | 项目源码 | 文件源码
def row_to_tuple(row):
    timestamp = pd.to_datetime("{0}-{1}-{2} {3}:00:00".format(row.year,
                                                              row.month,
                                                              row.day,
                                                              row.hour))
    return (row.page_name, (timestamp, row.n_views))
项目:zipline-chinese    作者:zhanghan1990    | 项目源码 | 文件源码
def dt_to_epoch_ns(dt_series):
        index = pd.to_datetime(dt_series.values)
        try:
            index = index.tz_localize('UTC')
        except TypeError:
            index = index.tz_convert('UTC')

        return index.view(np.int64)
项目:zipline-chinese    作者:zhanghan1990    | 项目源码 | 文件源码
def to_series(knowledge_dates, earning_dates):
    """
    Helper for converting a dict of strings to a Series of datetimes.

    This is just for making the test cases more readable.
    """
    return pd.Series(
        index=pd.to_datetime(knowledge_dates),
        data=pd.to_datetime(earning_dates),
    )
项目:zipline-chinese    作者:zhanghan1990    | 项目源码 | 文件源码
def _calc_minute_index(market_opens, minutes_per_day):
    minutes = np.zeros(len(market_opens) * minutes_per_day,
                       dtype='datetime64[ns]')
    deltas = np.arange(0, minutes_per_day, dtype='timedelta64[m]')
    for i, market_open in enumerate(market_opens):
        start = market_open.asm8
        minute_values = start + deltas
        start_ix = minutes_per_day * i
        end_ix = start_ix + minutes_per_day
        minutes[start_ix:end_ix] = minute_values
    return pd.to_datetime(minutes, utc=True, box=True)