Python pandas 模块,date_range() 实例源码
我们从Python开源项目中,提取了以下50个代码示例,用于说明如何使用pandas.date_range()。
def fill_nans(df, delta=None):
"""
"""
if not delta:
dt_diff = NP.diff(df.index.values)
delta_timedelta64 = min(dt_diff)
delta_seconds = delta_timedelta64 / NP.timedelta64(1, 's')
delta = timedelta(seconds=delta_seconds)
logger.info('Using delta = {} (s)'.format(delta.total_seconds()))
index_new = PD.date_range(start=df.index[0],
end=df.index[-1],
freq=delta)
missing = sorted(set(index_new) - set(df.index))
if missing:
logger.warning('Missing time indices (filled by NaNs):')
for x in missing:
logger.warning(x)
return df.reindex(index_new, copy=False), delta
def test_nan_filter_dataframe(self):
dates = pd.date_range('1/1/2000', periods=2, freq='B', tz='UTC')
df = pd.DataFrame(np.random.randn(2, 2),
index=dates,
columns=[4, 5])
# should be filtered
df.loc[dates[0], 4] = np.nan
# should not be filtered, should have been ffilled
df.loc[dates[1], 5] = np.nan
source = DataFrameSource(df)
event = next(source)
self.assertEqual(5, event.sid)
event = next(source)
self.assertEqual(4, event.sid)
event = next(source)
self.assertEqual(5, event.sid)
self.assertFalse(np.isnan(event.price))
def test_nan_filter_panel(self):
dates = pd.date_range('1/1/2000', periods=2, freq='B', tz='UTC')
df = pd.Panel(np.random.randn(2, 2, 2),
major_axis=dates,
items=[4, 5],
minor_axis=['price', 'volume'])
# should be filtered
df.loc[4, dates[0], 'price'] = np.nan
# should not be filtered, should have been ffilled
df.loc[5, dates[1], 'price'] = np.nan
source = DataPanelSource(df)
event = next(source)
self.assertEqual(5, event.sid)
event = next(source)
self.assertEqual(4, event.sid)
self.assertRaises(StopIteration, next, source)
def getCalendar(self):
"""
?????
:return:
"""
# ???????
tradecalendar = pd.DataFrame(data=pd.date_range(self.begin, self.end), columns=['date'])
# ??????????
types, weekdays = self._weekend_trade_day_type(tradecalendar["date"])
tradecalendar["type"] = types
tradecalendar["weekday"] = weekdays
tradecalendar["weekday"] += 1
tradecalendar = tradecalendar.set_index("date", drop=False)
# ?????????
tradecalendar = self._holiday_trade_day_type(tradecalendar)
# ??????
tradecalendar = self._tradestatus(tradecalendar)
return tradecalendar
def make_features(user_id,user_df):
"""
??????
"""
print 'user_id:', user_id
power = user_df.power_consumption
assert power.index[0] == user_df.index[0]
assert len(user_df.index) == 639
new_df = pd.DataFrame(index=user_df.index.union(pd.date_range('2016-9-1','2016-9-30')))
pw_new = power.copy()
#predict 30 days and 30days for features
for d in range(60):
pw_new.index += pd.Timedelta('1D')
new_df['power#-%d'%(d+1)] = pw_new
#create 30 models
for d in range(30):
#30 days features
x_ = new_df[new_df.columns[d:30+d]]
x_['y'] = power
x_.to_csv('./features/day_model/%d/%d.csv'%(d+1,user_id))
#return x_
def make_month_features(user_id,user_df):
"""
??????
"""
print 'user_id:', user_id
power = user_df.power_consumption.copy()
assert power.index[0] == user_df.index[0]
new_df = pd.DataFrame(index=user_df.index.union(pd.date_range('2016-10-1','2016-10-31')))
pw_new = power.copy()
#predict 30 days and 30days for features
for d in range(30):
pw_new.index += pd.Timedelta('1D')
new_df['power#-%d'%(d+1)] = pw_new
#create 30 models
for d in range(31):
#30 days features
new_df['y#%d'%d] = power
power.index -= pd.Timedelta('1D')
save_month_df(new_df,user_id)
return new_df
def create_energysystem(nodes, **arguments):
"""Creates the energysystem.
Parameters
----------
nodes:
A list of entities that comprise the energy system
**arguments : key word arguments
Arguments passed from command line
"""
datetime_index = pd.date_range(arguments['--date-from'],
arguments['--date-to'],
freq='60min')
es = EnergySystem(entities=nodes,
groupings=GROUPINGS,
timeindex=datetime_index)
return es
def _from_dataset_test_variables(self):
"""The variables and coords needed for the from_dataset tests"""
variables = {
# 3d-variable
'v0': xr.Variable(('time', 'ydim', 'xdim'), np.zeros((4, 4, 4))),
# 2d-variable with time and x
'v1': xr.Variable(('time', 'xdim', ), np.zeros((4, 4))),
# 2d-variable with y and x
'v2': xr.Variable(('ydim', 'xdim', ), np.zeros((4, 4))),
# 1d-variable
'v3': xr.Variable(('xdim', ), np.zeros(4))}
coords = {
'ydim': xr.Variable(('ydim', ), np.arange(1, 5)),
'xdim': xr.Variable(('xdim', ), np.arange(4)),
'time': xr.Variable(
('time', ),
pd.date_range('1999-01-01', '1999-05-01', freq='M').values)}
return variables, coords
def make_features(locations_file='blocations.csv',
timeseries_file='burundioutput.csv',
startdate='2015-05-01'):
locations = pd.read_csv(locations_file)
timeseries = pd.read_csv(timeseries_file)
n_days = timeseries.shape[0]
# Construct an index with real dates rather than day numbers
timeseries.index = pd.date_range(startdate, periods=n_days)
features = []
for location in locations.itertuples(name='Location'):
latlon = (location.latitude, location.longitude)
loctype_by_day = get_loctype(location, timeseries.index)
population_by_day = get_population(timeseries, location.name)
data_for_location = pd.DataFrame({'loctype': loctype_by_day,
'population': population_by_day})
feature = mgj.make_gj_points(latlon, location.name, data_for_location)
features.extend(feature)
return features
def test_make_gj_points():
index = pandas.date_range('2015-3-1', periods=100)
popn = pandas.Series([n * 500 for n in range(100)], index=index)
loctype = pandas.Series((['city'] * 50) + (['conflict'] * 50), index=index)
timeseries = pandas.DataFrame({'loctype': loctype, 'population': popn})
res = make_geojson.make_gj_points((52.0, 0.0), 'Examplecamp', timeseries)
assert len(res) == 100
assert res[0]['type'] == 'Feature'
assert res[0]['properties']['start'] == '2015-03-01'
assert res[0]['properties']['end'] == '2015-03-02'
assert res[0]['properties']['loctype'] == 'city'
assert res[0]['geometry']['coordinates'] == (0.0, 52.0)
assert res[50]['properties']['loctype'] == 'conflict'
assert res[50]['properties']['start'] == '2015-04-20'
def date_range_index(self, start, end=None, by=24):
""" return a (list of) time sequence that allow indexing one or several time intervals between start and end every 'by' hours
if end is None, only one time interval of 'by' hours is returned
start and end are expected in local time
"""
if end is None:
seq = pandas.date_range(start=start, periods=by, freq='H',
tz=self.timezone.zone)
return seq.tz_convert('UTC')
else:
seq = pandas.date_range(start=start, end=end, freq='H',
tz=self.timezone.zone)
seq = seq.tz_convert('UTC')
bins = pandas.date_range(start=start, end=end, freq=str(by) + 'H',
tz=self.timezone.zone)
bins = bins.tz_convert('UTC')
return [seq[(seq >= bins[i]) & (seq < bins[i + 1])] for i in
range(len(bins) - 1)]
def __init__(self, year, seasons=None, holidays=None):
if calendar.isleap(year):
hoy = 8784
else:
hoy = 8760
self.datapath = os.path.join(os.path.dirname(__file__), 'bdew_data')
self.date_time_index = pd.date_range(
pd.datetime(year, 1, 1, 0), periods=hoy * 4, freq='15Min')
if seasons is None:
self.seasons = {
'summer1': [5, 15, 9, 14], # summer: 15.05. to 14.09
'transition1': [3, 21, 5, 14], # transition1 :21.03. to 14.05
'transition2': [9, 15, 10, 31], # transition2 :15.09. to 31.10
'winter1': [1, 1, 3, 20], # winter1: 01.01. to 20.03
'winter2': [11, 1, 12, 31], # winter2: 01.11. to 31.12
}
else:
self.seasons = seasons
self.year = year
self.slp_frame = self.all_load_profiles(self.date_time_index,
holidays=holidays)
def date_op():
start = pd.date_range('2015-01-01', periods=50)
#print start
print type(start)
date_list = [datetime.datetime(2017, 1, 1), datetime.datetime(2017, 1, 2), datetime.datetime(2017, 1, 3),
datetime.datetime(2017, 1, 4)]
df = pd.DataFrame(np.random.randn(4), index=date_list)
print df
print df.index[2]
format_line()
s_x = pd.date_range('2000-1-1', periods=1000)
df_x = pd.DataFrame(np.arange(2000).reshape(1000, 2), index=s_x)
print df_x
print df_x.ix['2002/09/24']
print df_x[1]
#?????????
#?????ix
print df_x.ix['2001-09']
def convert_data_to_timeseries(input_file, column, verbose=False):
# Load the input file
data = np.loadtxt(input_file, delimiter=',')
# Extract the start and end dates
start_date = str(int(data[0,0])) + '-' + str(int(data[0,1]))
end_date = str(int(data[-1,0] + 1)) + '-' + str(int(data[-1,1] % 12 + 1))
if verbose:
print "\nStart date =", start_date
print "End date =", end_date
# Create a date sequence with monthly intervals
dates = pd.date_range(start_date, end_date, freq='M')
# Convert the data into time series data
data_timeseries = pd.Series(data[:,column], index=dates)
if verbose:
print "\nTime series data:\n", data_timeseries[:10]
return data_timeseries
def get_gsod_data(self, station, year):
filename_format = '/pub/data/gsod/{year}/{station}-{year}.op.gz'
lines = self._retreive_file_lines(filename_format, station, year)
dates = pd.date_range("{}-01-01 00:00".format(year),
"{}-12-31 00:00".format(year),
freq='D', tz=pytz.UTC)
series = pd.Series(None, index=dates, dtype=float)
for line in lines[1:]:
columns = line.split()
date_str = columns[2].decode('utf-8')
temp_F = float(columns[3])
temp_C = (5. / 9.) * (temp_F - 32.)
dt = pytz.UTC.localize(datetime.strptime(date_str, "%Y%m%d"))
series[dt] = temp_C
return series
def get_isd_data(self, station, year):
filename_format = '/pub/data/noaa/{year}/{station}-{year}.gz'
lines = self._retreive_file_lines(filename_format, station, year)
dates = pd.date_range("{}-01-01 00:00".format(year),
"{}-12-31 23:00".format(int(year) + 1),
freq='H', tz=pytz.UTC)
series = pd.Series(None, index=dates, dtype=float)
for line in lines:
if line[87:92].decode('utf-8') == "+9999":
temp_C = float("nan")
else:
temp_C = float(line[87:92]) / 10.
date_str = line[15:27].decode('utf-8')
# there can be multiple readings per hour, so set all to minute 0
dt = pytz.UTC.localize(datetime.strptime(date_str, "%Y%m%d%H%M")).replace(minute=0)
# only set the temp if it's the first encountered in the hour.
if pd.isnull(series.ix[dt]):
series[dt] = temp_C
return series
def test_to_records(serializer):
data = {"value": [1, np.nan], "estimated": [True, False]}
columns = ["value", "estimated"]
index = pd.date_range('2000-01-01', periods=2, freq='D')
df = pd.DataFrame(data, index=index, columns=columns)
records = serializer.to_records(df)
assert len(records) == 2
assert records[0]["start"] == datetime(2000, 1, 1, tzinfo=pytz.UTC)
assert records[0]["value"] == 1
assert records[0]["estimated"]
assert records[1]["start"] == datetime(2000, 1, 2, tzinfo=pytz.UTC)
assert pd.isnull(records[1]["value"])
assert not records[1]["estimated"]
def test_to_records(serializer):
data = {"value": [1, np.nan], "estimated": [True, False]}
columns = ["value", "estimated"]
index = pd.date_range('2000-01-01', periods=2, freq='D')
df = pd.DataFrame(data, index=index, columns=columns)
records = serializer.to_records(df)
assert len(records) == 2
assert records[0]["end"] == datetime(2000, 1, 1, tzinfo=pytz.UTC)
assert pd.isnull(records[0]["value"])
assert not records[0]["estimated"]
assert records[1]["end"] == datetime(2000, 1, 2, tzinfo=pytz.UTC)
assert records[1]["value"] == 1
assert records[1]["estimated"]
def meter_input_daily(project_meter_input):
record_starts = pd.date_range(
'2012-01-01', periods=365 * 4, freq='D', tz=pytz.UTC)
records = [
{
"start": dt.isoformat(),
"value": 1.0,
"estimated": False
} for dt in record_starts
]
trace = _natural_gas_input(records)
trace.update({'interval': 'daily'})
meter_input = {
"type": "SINGLE_TRACE_SIMPLE_PROJECT",
"trace": trace,
"project": project_meter_input,
}
return meter_input
def meter_input_hourly(project_meter_input):
record_starts = pd.date_range(
'2012-01-01', periods=365 * 4 * 24, freq='H', tz=pytz.UTC)
records = [
{
"start": dt.isoformat(),
"value": 1.0 + dt.hour,
"estimated": False
} for dt in record_starts
]
trace = _natural_gas_input(records)
trace.update({'interval': 'hourly'})
meter_input = {
"type": "SINGLE_TRACE_SIMPLE_PROJECT",
"trace": trace,
"project": project_meter_input,
}
return meter_input
def meter_input_daily_baseline_only(project_meter_input):
record_starts = pd.date_range(
'2012-01-01', periods=365 * 1, freq='D', tz=pytz.UTC)
records = [
{
"start": dt.isoformat(),
"value": 1.0,
"estimated": False
} for dt in record_starts
]
meter_input = {
"type": "SINGLE_TRACE_SIMPLE_PROJECT",
"trace": _natural_gas_input(records),
"project": project_meter_input,
}
return meter_input
def meter_input_daily_reporting_only(project_meter_input):
record_starts = pd.date_range(
'2014-02-01', periods=365 * 1, freq='D', tz=pytz.UTC)
records = [
{
"start": dt.isoformat(),
"value": 1.0,
"estimated": False
} for dt in record_starts
]
meter_input = {
"type": "SINGLE_TRACE_SIMPLE_PROJECT",
"trace": _natural_gas_input(records),
"project": project_meter_input,
}
return meter_input
def meter_input_daily_with_period_start_end(
project_meter_input_with_period_start_end):
record_starts = pd.date_range(
'2012-01-01', periods=365 * 4, freq='D', tz=pytz.UTC)
records = [
{
"start": dt.isoformat(),
"value": 1.0,
"estimated": False
} for dt in record_starts
]
trace = _natural_gas_input(records)
trace.update({'interval': 'daily'})
meter_input = {
"type": "SINGLE_TRACE_SIMPLE_PROJECT",
"trace": trace,
"project": project_meter_input_with_period_start_end,
}
return meter_input
def meter_input_strange_interpretation(project_meter_input):
record_starts = pd.date_range(
'2012-01-01', periods=365 * 4, freq='D', tz=pytz.UTC)
records = [
{
"start": dt.isoformat(),
"value": 1.0,
"estimated": False
} for dt in record_starts
]
meter_input = {
"type": "SINGLE_TRACE_SIMPLE_PROJECT",
"trace": {
"type": "ARBITRARY_START",
"interpretation": "ELECTRICITY_CONSUMPTION_NET",
"unit": "therm",
"records": records
},
"project": project_meter_input
}
return meter_input
def trace4():
trace_length = 100
data = {
"value": [1 for _ in range(trace_length)],
"estimated": [False for _ in range(trace_length)]
}
columns = ["value", "estimated"]
index = pd.date_range(
start=datetime(2011, 1, 1, tzinfo=pytz.UTC),
periods=trace_length,
freq='D',
tz=pytz.UTC
)
df = pd.DataFrame(data, index=index, columns=columns)
return EnergyTrace("ELECTRICITY_CONSUMPTION_SUPPLIED", df, unit="KWH")
def parse_raw(filepath,seconds=1):
'''
??filepath?????????????
:param filepath: ???????????????
:param seconds: int??????????????
:return: dataframe??index??????columns??????
'''
data_head=pd.read_csv(filepath,delim_whitespace=True,header=None,nrows=1)
data=pd.read_csv(filepath,delim_whitespace=True,header=None,skiprows=2)
date_start=data_head.iloc[0,3]
time_start=data.iloc[1,0]+' '+data.iloc[1,1]
datetime_start=pd.to_datetime(date_start+' '+time_start)
columns=list(data.iloc[0,2:])
newdata=data.iloc[1:,2:].applymap(convert2float)
newdata=newdata.dropna(axis=0,how='any')
newdata=newdata.loc[(newdata.applymap(type)==type('')).sum(axis=1)<newdata.shape[1]]
newdata=newdata.applymap(convert2float)
newdata.columns=columns
newdata.index=pd.date_range(start=datetime_start,periods=newdata.shape[0],freq='%dS'%seconds)
newdata.index.name='datetime'
return newdata
def test_date_range_lower_freq():
cal = mcal.get_calendar("NYSE")
schedule = cal.schedule(pd.Timestamp('2017-09-05 20:00', tz='UTC'), pd.Timestamp('2017-10-23 20:00', tz='UTC'))
# cannot get date range of frequency lower than 1D
with pytest.raises(ValueError):
mcal.date_range(schedule, frequency='3D')
# instead get for 1D and convert to lower frequency
short = mcal.date_range(schedule, frequency='1D')
actual = mcal.convert_freq(short, '3D')
expected = pd.date_range('2017-09-05 20:00', '2017-10-23 20:00', freq='3D', tz='UTC')
assert_index_equal(actual, expected)
actual = mcal.convert_freq(short, '1W')
expected = pd.date_range('2017-09-05 20:00', '2017-10-23 20:00', freq='1W', tz='UTC')
assert_index_equal(actual, expected)
def get_periods_range(start_dt, end_dt, freq):
"""
Get a date range for the specified parameters.
Parameters
----------
start_dt: datetime
end_dt: datetime
freq: str
Returns
-------
DateTimeIndex
"""
if freq == 'minute':
freq = 'T'
elif freq == 'daily':
freq = 'D'
return pd.date_range(start_dt, end_dt, freq=freq)
def test_contract_at_offset(self):
contract_sids = array([1, 2, 3, 4], dtype=int64)
start_dates = pd.date_range('2015-01-01', periods=4, tz="UTC")
contracts = deque(self.asset_finder.retrieve_all(contract_sids))
oc = OrderedContracts('FO', contracts)
self.assertEquals(1,
oc.contract_at_offset(1, 0, start_dates[-1].value),
"Offset of 0 should return provided sid")
self.assertEquals(2,
oc.contract_at_offset(1, 1, start_dates[-1].value),
"Offset of 1 should return next sid in chain.")
self.assertEquals(None,
oc.contract_at_offset(4, 1, start_dates[-1].value),
"Offset at end of chain should not crash.")
def test_next_event_indexer(self):
events = self.events
event_sids = events['sid'].values
event_dates = events['event_date'].values
event_timestamps = events['timestamp'].values
all_dates = pd.date_range('2014', '2014-01-31')
all_sids = np.unique(event_sids)
indexer = next_event_indexer(
all_dates,
all_sids,
event_dates,
event_timestamps,
event_sids,
)
# Compute expected results without knowledge of null events.
for i, sid in enumerate(all_sids):
self.check_next_event_indexer(
events,
all_dates,
sid,
indexer[:, i],
)
def force_start_end_data_to_dataframe(user, dataframe, start_date, end_date):
assert type(dataframe) == pd.DataFrame
# if dataframe contains any dates outside of start and end date ... exclude
dataframe = dataframe[start_date:end_date].asfreq('D')
index = pd.date_range(start=start_date, end=end_date, tz=user.pytz_timezone)
# blank dataframe that we know for certain holds all the right dates
dataframe_container = pd.DataFrame(index=index)
# join the dataframe with an empty one that has all the right indices ... to return a dataframe with all the right
# start and end dates
normalized_dataframe = pd.DataFrame.join(dataframe_container, dataframe)
# Pandas is like a fine edged sword, sometimes it cuts everything perfectly, other times you don't know it's
# power and it claws at you and takes back the bamboo. For the record, problem is not the panda, but the trainer.
assert dataframe_container.index.size == normalized_dataframe.index.size
return normalized_dataframe
def _get_serialized_dataframe(self, supplement_name, boolean_string_name, values_to_create):
data_values = [boolean_string_name] * values_to_create
today = datetime.date.today()
periods_ago = today - datetime.timedelta(days=values_to_create - 1)
date_range = pd.date_range(periods_ago, today)
# this would be stupid if the count is off
self.assertEqual(len(data_values), len(date_range))
dataframe = pd.DataFrame(index=date_range)
dataframe[supplement_name] = data_values
# make sure there's no dynamic type conversion that can screw you
series = dataframe[supplement_name]
self.assertEqual(series[0], boolean_string_name)
serialized_dataframe = ExcelSupplementFileSerializer._sanitize_dataframe_values(dataframe)
return serialized_dataframe
def import_history(self, start_date, end_date):
dataframe_columns = RESCUETIME_EFFICIENCY_HEADERS + [PRODUCTIVITY_PULSE]
historical_df = pd.DataFrame(columns=dataframe_columns)
query_dates = pd.date_range(start=start_date, end=end_date).date
for query_date in query_dates:
response = self._get_rescuetime_efficiency_for_date(query_date)
if response.status_code != 200:
continue
efficiency_timeseries = self.get_efficiency_timeseries_from_response(response)
pulse = calculate_rescue_time_pulse_from_dataframe(efficiency_timeseries)
efficiency_timeseries[PRODUCTIVITY_PULSE] = pulse
# Update the dataframe with history
historical_df.loc[query_date] = efficiency_timeseries
# when done, update into the results
self.results = historical_df
def __init__(self, user, periods_back=30):
self.user = user
self.hour_series = range(0, 24)
historical_data_points_quantity = periods_back
end_date = timezone.now()
# use pandas to generate a nifty index of timestamps, use timezone to remove warning signals
self.date_series = pd.date_range(end=end_date, freq='D', periods=historical_data_points_quantity)
# build a series that shows the impact of what supplements/events have on sleep
self.sleep_impact_series = pd.Series(0, index=self.date_series)
self.productivity_impact_series = pd.Series(0, index=self.date_series)
self.sleep_series = self._get_random_sleep_series(self.date_series)
# Create a cache here because creating many events is very slow on Production ...
# so create a cache of commonly used Django objects and then create a bunch of events that
# need this foreign key, so we can use bulk_create
self.user_activities = {}
self.supplements = {}
def create_timeseries(starting_date, ending_date, value=0):
"""Create a Pandas Time Series with constant values.
Attributes
----------
starting_date: str, pandas.tslib.Timestamp
The first date of the Time Series.
ending_date: str, pandas.tslib.Timestamp
The last date of the Time Series.
value: int,float
Value to add to new entries. Default is zero.
"""
timeseries_index = pd.date_range(starting_date, ending_date)
timeseries = pd.Series(value, index=timeseries_index)
return timeseries
def create_es(solver, timesteps, year):
"""
Creates a default energy system to load results into.
"""
simulation = es.Simulation(solver=solver,
timesteps=timesteps,
debug=False,
objective_options={"function": minimize_cost})
# Adding a time index to the energy system
time_index = pd.date_range('1/1/' + year,
periods=len(timesteps),
freq='H')
energysystem = es.EnergySystem(time_idx=time_index,
simulation=simulation)
return energysystem
def _hourly_range(self, init_date, time_frame):
"""
Returns DatetimeIndex trading week/s in hours.
"""
utcnow = datetime.utcnow()
tr_wk_str, tr_wk_end = self.get_trading_week(init_date)
if tr_wk_end > utcnow:
tr_wk_end = utcnow.replace(
minute=00,second=00, microsecond=00)
freq, interval_type, delta = self._data_frequency(time_frame)
dth = pd.date_range(str(tr_wk_str), str(tr_wk_end), freq=freq)
while (len(dth) % (300*int(time_frame[1:])) == 0) == False:
tr_wk_str = tr_wk_end + timedelta(**{interval_type: delta})
if tr_wk_str < utcnow:
tr_wk_str, tr_wk_end = self.get_trading_week(tr_wk_str)
if tr_wk_end > utcnow:
tr_wk_end = utcnow.replace(
minute=00,second=00, microsecond=00)
tr_wk_end += timedelta(hours=1)
dth = dth.append(
pd.date_range(str(tr_wk_str), str(tr_wk_end), freq=freq))
else:
break
return dth
def _daily_range(self, daily):
"""
Returns DatetimeIndex for daily values.
"""
max_bars = 299
utcnow = datetime.utcnow()
dtd = pd.DatetimeIndex([])
while daily < utcnow:
tr_wk_str, tr_wk_end = self.get_trading_week(daily)
hour = int(str(tr_wk_str.time())[:2])
daily += timedelta(days=1)
daily = daily.replace(hour=hour)
if daily >= tr_wk_end:
daily, tr_wk_end = self.get_trading_week(daily)
dtd = dtd.append(
pd.date_range(str(daily), str(daily)))
return dtd
def _monthly_range(self, last_day_of_month):
"""
Returns DatetimeIndex for monthly values.
"""
ldom = last_day_of_month
max_bars = 299
utcnow = datetime.utcnow()
dtm = pd.DatetimeIndex([])
while ldom < utcnow:
dtm = dtm.append(pd.date_range(
str(ldom), str(ldom)))
if ldom.month == 12:
ldom = ldom.replace(year=ldom.year+1, month=2, day=1)
elif ldom.month == 11:
ldom = ldom.replace(year=ldom.year+1, month=1, day=1)
else:
ldom = ldom.replace(month=ldom.month+2, day=1)
ldom -= timedelta(days=1)
ldom = ldom.replace(hour=self.new_york_offset(ldom, 22))
return dtm
def fill_in_missing_dates(df, date_col_name, other_col):
startd = df[date_col_name].values[0]
endd = df[date_col_name].values[-1]
print startd, endd
idx = pd.date_range(startd, endd)
dict = {}
for index, row in df.iterrows():
dict[row[date_col_name]] = row[other_col]
new_data = []
for d in idx:
pydate = d.to_pydatetime()
daskey = pydate.strftime('%Y-%m-%d')
new_data.append([daskey, dict[daskey] if dict.has_key(daskey) else None])
return np.row_stack(new_data)
def fill_in_missing_dates(df, date_col_name, other_col):
startd = df[date_col_name].values[0]
endd = df[date_col_name].values[-1]
print startd, endd
idx = pd.date_range(startd, endd)
dict = {}
for index, row in df.iterrows():
dict[row[date_col_name]] = row[other_col]
new_data = []
for d in idx:
pydate = d.to_pydatetime()
daskey = pydate.strftime('%Y-%m-%d')
new_data.append([daskey, dict[daskey] if dict.has_key(daskey) else 0])
return np.row_stack(new_data)
def test_daily(self):
rng = date_range('1/1/2000', '12/31/2004', freq='D')
ts = Series(np.random.randn(len(rng)), index=rng)
annual = pivot_annual(ts, 'D')
doy = ts.index.dayofyear
doy[(~isleapyear(ts.index.year)) & (doy >= 60)] += 1
for i in range(1, 367):
subset = ts[doy == i]
subset.index = [x.year for x in subset.index]
result = annual[i].dropna()
tm.assert_series_equal(result, subset, check_names=False)
self.assertEqual(result.name, i)
# check leap days
leaps = ts[(ts.index.month == 2) & (ts.index.day == 29)]
day = leaps.index.dayofyear[0]
leaps.index = leaps.index.year
leaps.name = 60
tm.assert_series_equal(annual[day].dropna(), leaps)
def market_minutes_for_day(self, stamp):
market_open, market_close = self.get_open_and_close(stamp)
return pd.date_range(market_open, market_close, freq='T')
def get_trading_days(start, end, trading_day=trading_day):
return pd.date_range(start=start.date(),
end=end.date(),
freq=trading_day).tz_localize('UTC')
def get_trading_days(start, end, trading_day=trading_day):
return pd.date_range(start=start.date(),
end=end.date(),
freq=trading_day).tz_localize('UTC')
def get_trading_days(start, end, trading_day=trading_day):
return pd.date_range(start=start.date(),
end=end.date(),
freq=trading_day).tz_localize('UTC')
def get_trading_days(start, end, trading_day=trading_day):
return pd.date_range(start=start.date(),
end=end.date(),
freq=trading_day).tz_localize('UTC')
def gen_calendars(start, stop, critical_dates):
"""
Generate calendars to use as inputs.
"""
all_dates = pd.date_range(start, stop, tz='utc')
for to_drop in map(list, powerset(critical_dates)):
# Have to yield tuples.
yield (all_dates.drop(to_drop),)
# Also test with the trading calendar.
yield (trading_days[trading_days.slice_indexer(start, stop)],)
def test_basics(self, window=10):
items = ['bar', 'baz', 'foo']
minor = ['A', 'B', 'C', 'D']
rp = MutableIndexRollingPanel(window, items, minor, cap_multiple=2)
dates = pd.date_range('2000-01-01', periods=30, tz='utc')
major_deque = deque(maxlen=window)
frames = {}
for i, date in enumerate(dates):
frame = pd.DataFrame(np.random.randn(3, 4), index=items,
columns=minor)
rp.add_frame(date, frame)
frames[date] = frame
major_deque.append(date)
result = rp.get_current()
expected = pd.Panel(frames, items=list(major_deque),
major_axis=items, minor_axis=minor)
tm.assert_panel_equal(result, expected.swapaxes(0, 1))
def setUpClass(cls):
cls.dates = dates = pd.date_range('2014-01-01', '2014-01-03')
dates = cls.dates.repeat(3)
cls.sids = sids = ord('A'), ord('B'), ord('C')
cls.df = df = pd.DataFrame({
'sid': sids * 3,
'value': (0., 1., 2., 1., 2., 3., 2., 3., 4.),
'int_value': (0, 1, 2, 1, 2, 3, 2, 3, 4),
'asof_date': dates,
'timestamp': dates,
})
cls.dshape = dshape("""
var * {
sid: ?int64,
value: ?float64,
int_value: ?int64,
asof_date: datetime,
timestamp: datetime
}
""")
cls.macro_df = df[df.sid == 65].drop('sid', axis=1)
dshape_ = OrderedDict(cls.dshape.measure.fields)
del dshape_['sid']
cls.macro_dshape = var * Record(dshape_)
cls.garbage_loader = BlazeLoader()
cls.missing_values = {'int_value': 0}