Python pandas 模块,Panel() 实例源码
我们从Python开源项目中,提取了以下50个代码示例,用于说明如何使用pandas.Panel()。
def aggregate_ohlcv_panel(self,
fields,
ohlcv_panel,
items=None,
minor_axis=None):
"""
Convert an OHLCV Panel into a DataFrame by aggregating each field's
frame into a Series.
"""
vals = ohlcv_panel
if isinstance(ohlcv_panel, pd.Panel):
vals = ohlcv_panel.values
items = ohlcv_panel.items
minor_axis = ohlcv_panel.minor_axis
data = [
self.frame_to_series(
field,
vals[items.get_loc(field)],
minor_axis
)
for field in fields
]
return np.array(data)
def test_nan_filter_panel(self):
dates = pd.date_range('1/1/2000', periods=2, freq='B', tz='UTC')
df = pd.Panel(np.random.randn(2, 2, 2),
major_axis=dates,
items=[4, 5],
minor_axis=['price', 'volume'])
# should be filtered
df.loc[4, dates[0], 'price'] = np.nan
# should not be filtered, should have been ffilled
df.loc[5, dates[1], 'price'] = np.nan
source = DataPanelSource(df)
event = next(source)
self.assertEqual(5, event.sid)
event = next(source)
self.assertEqual(4, event.sid)
self.assertRaises(StopIteration, next, source)
def setUp(self):
self.env = TradingEnvironment()
self.days = self.env.trading_days[:5]
self.panel = pd.Panel({1: pd.DataFrame({
'price': [1, 1, 2, 4, 8], 'volume': [1e9, 1e9, 1e9, 1e9, 0],
'type': [DATASOURCE_TYPE.TRADE,
DATASOURCE_TYPE.TRADE,
DATASOURCE_TYPE.TRADE,
DATASOURCE_TYPE.TRADE,
DATASOURCE_TYPE.CLOSE_POSITION]},
index=self.days)
})
self.no_close_panel = pd.Panel({1: pd.DataFrame({
'price': [1, 1, 2, 4, 8], 'volume': [1e9, 1e9, 1e9, 1e9, 1e9],
'type': [DATASOURCE_TYPE.TRADE,
DATASOURCE_TYPE.TRADE,
DATASOURCE_TYPE.TRADE,
DATASOURCE_TYPE.TRADE,
DATASOURCE_TYPE.TRADE]},
index=self.days)
})
def request_prices(self, current_date, symbols):
"""Implementation of abstract base class method."""
# Reset the bar object for the latest assets requested.
self.bar = pd.Panel(
items=[PriceFields.current_price.value], major_axis=[current_date],
minor_axis=symbols
)
# Issue requests to Interactive Brokers for the latest price data of
# each asset in the list of bars.
for i, s in enumerate(symbols):
c = self.create_contract(s)
self.conn.reqMktData(i, c, "", True)
# Wait a moment.
sleep(0.5)
return self.bar
def init_class_fixtures(cls):
super(WithPanelBarReader, cls).init_class_fixtures()
finder = cls.asset_finder
trading_calendar = get_calendar('NYSE')
items = finder.retrieve_all(finder.sids)
major_axis = (
trading_calendar.sessions_in_range if cls.FREQUENCY == 'daily'
else trading_calendar.minutes_for_sessions_in_range
)(cls.START_DATE, cls.END_DATE)
minor_axis = ['open', 'high', 'low', 'close', 'volume']
shape = tuple(map(len, [items, major_axis, minor_axis]))
raw_data = np.arange(shape[0] * shape[1] * shape[2]).reshape(shape)
cls.panel = pd.Panel(
raw_data,
items=items,
major_axis=major_axis,
minor_axis=minor_axis,
)
cls.reader = PanelBarReader(trading_calendar, cls.panel, cls.FREQUENCY)
def test_duplicate_values(self):
UNIMPORTANT_VALUE = 57
panel = pd.Panel(
UNIMPORTANT_VALUE,
items=['a', 'b', 'b', 'a'],
major_axis=['c'],
minor_axis=['d'],
)
unused = ExplodingObject()
axis_names = ['items', 'major_axis', 'minor_axis']
for axis_order in permutations((0, 1, 2)):
transposed = panel.transpose(*axis_order)
with self.assertRaises(ValueError) as e:
PanelBarReader(unused, transposed, 'daily')
expected = (
"Duplicate entries in Panel.{name}: ['a', 'b'].".format(
name=axis_names[axis_order.index(0)],
)
)
self.assertEqual(str(e.exception), expected)
def swaplevel(self, i, j, axis=0):
"""
Swap levels i and j in a MultiIndex on a particular axis
Parameters
----------
i, j : int, string (can be mixed)
Level of index to be swapped. Can pass level name as string.
Returns
-------
swapped : type of caller (new object)
"""
axis = self._get_axis_number(axis)
result = self.copy()
labels = result._data.axes[axis]
result._data.set_axis(axis, labels.swaplevel(i, j))
return result
# ----------------------------------------------------------------------
# Rename
# TODO: define separate funcs for DataFrame, Series and Panel so you can
# get completion on keyword arguments.
def get(self, key, default=None):
"""
Get item from object for given key (DataFrame column, Panel slice,
etc.). Returns default value if not found.
Parameters
----------
key : object
Returns
-------
value : type of items contained in object
"""
try:
return self[key]
except (KeyError, ValueError, IndexError):
return default
def test_resample_panel(self):
rng = date_range('1/1/2000', '6/30/2000')
n = len(rng)
panel = Panel(np.random.randn(3, n, 5),
items=['one', 'two', 'three'],
major_axis=rng,
minor_axis=['a', 'b', 'c', 'd', 'e'])
result = panel.resample('M', axis=1).mean()
def p_apply(panel, f):
result = {}
for item in panel.items:
result[item] = f(panel[item])
return Panel(result, items=panel.items)
expected = p_apply(panel, lambda x: x.resample('M').mean())
tm.assert_panel_equal(result, expected)
panel2 = panel.swapaxes(1, 2)
result = panel2.resample('M', axis=2).mean()
expected = p_apply(panel2, lambda x: x.resample('M', axis=1).mean())
tm.assert_panel_equal(result, expected)
def test_resample_panel_numpy(self):
rng = date_range('1/1/2000', '6/30/2000')
n = len(rng)
panel = Panel(np.random.randn(3, n, 5),
items=['one', 'two', 'three'],
major_axis=rng,
minor_axis=['a', 'b', 'c', 'd', 'e'])
result = panel.resample('M', axis=1).apply(lambda x: x.mean(1))
expected = panel.resample('M', axis=1).mean()
tm.assert_panel_equal(result, expected)
panel = panel.swapaxes(1, 2)
result = panel.resample('M', axis=2).apply(lambda x: x.mean(2))
expected = panel.resample('M', axis=2).mean()
tm.assert_panel_equal(result, expected)
def test_panel_aggregation(self):
ind = pd.date_range('1/1/2000', periods=100)
data = np.random.randn(2, len(ind), 4)
wp = pd.Panel(data, items=['Item1', 'Item2'], major_axis=ind,
minor_axis=['A', 'B', 'C', 'D'])
tg = TimeGrouper('M', axis=1)
_, grouper, _ = tg._get_grouper(wp)
bingrouped = wp.groupby(grouper)
binagg = bingrouped.mean()
def f(x):
assert (isinstance(x, Panel))
return x.mean(1)
result = bingrouped.agg(f)
tm.assert_panel_equal(result, binagg)
def test_binary_ops_docs(self):
from pandas import DataFrame, Panel
op_map = {'add': '+',
'sub': '-',
'mul': '*',
'mod': '%',
'pow': '**',
'truediv': '/',
'floordiv': '//'}
for op_name in ['add', 'sub', 'mul', 'mod', 'pow', 'truediv',
'floordiv']:
for klass in [Series, DataFrame, Panel]:
operand1 = klass.__name__.lower()
operand2 = 'other'
op = op_map[op_name]
expected_str = ' '.join([operand1, op, operand2])
self.assertTrue(expected_str in getattr(klass,
op_name).__doc__)
# reverse version of the binary ops
expected_str = ' '.join([operand2, op, operand1])
self.assertTrue(expected_str in getattr(klass, 'r' +
op_name).__doc__)
def test_to_panel_expanddim(self):
# GH 9762
class SubclassedFrame(DataFrame):
@property
def _constructor_expanddim(self):
return SubclassedPanel
class SubclassedPanel(Panel):
pass
index = MultiIndex.from_tuples([(0, 0), (0, 1), (0, 2)])
df = SubclassedFrame({'X': [1, 2, 3], 'Y': [4, 5, 6]}, index=index)
result = df.to_panel()
self.assertTrue(isinstance(result, SubclassedPanel))
expected = SubclassedPanel([[[1, 2, 3]], [[4, 5, 6]]],
items=['X', 'Y'], major_axis=[0],
minor_axis=[0, 1, 2],
dtype='int64')
tm.assert_panel_equal(result, expected)
def getmtm(cf, alpha=0.95):
"""
Calcule la MtM (i.e. : les cash-flows moyens réalisés et un intervalle
de confiance) de chaque actif listé dans `cf`.
Paramètres
----------
cf : pandas.Panel
Cash-flows réalisés pour chaque simulation (`items`), chaque
actif (`major_axis`) et chaque date (`minor_axis`).
alpha : double compris entre 0. et 1.
Quantile à utiliser pour le calcul des intervalles de confiances.
"""
nsims = cf.shape[0]
cumvalues = cf.sum(axis=2)
mean = cumvalues.mean(axis=1)
std = cumvalues.std(axis=1)
res = {}
for key, val in mean.items():
res[key] = mkmcresults(val, std[key], nsims)
return res
def backtest(config_file, day_trade):
cfg = config.Config(config_file)
cfg.day_trade = day_trade
dfs = load_data(config_file)
trender = strategies[cfg.strategy](**cfg.strategy_parameters)
res = []
for df in dfs:
res.append(trender.backtest(data_frame=df))
final_panel = pd.Panel({os.path.basename(p['path']): df for p, df in
zip(cfg.data_path, res)})
profit_series = final_panel.sum(axis=0)['total_profit'].cumsum()
final_panel.to_excel(cfg.output_file)
if cfg.show:
profit_series.plot()
plt.xlabel('Time')
plt.ylabel('Profit')
plt.legend('Profit')
plt.show()
def trade_summary_all(self):
dct = OrderedDict()
panel = pd.Panel(self.trade_summary).swapaxes(0, 1)
for field in panel.keys():
if field.startswith(u"?"):
dct[field] = panel[field].apply(np.sum, axis=0)
for field in [u"??????", u"??????"]:
dct[field] = panel[field].apply(np.max, axis=0)
dct[u"??????"] = dct[u"???"] / dct[u"?????"]
dct[u"??????"] = dct[u"???"] / dct[u"?????"]
dct[u"????????"] = dct[u"?????"] / dct[u"?????"]
dct[u"????????"] = dct[u"?????"] / dct[u"?????"]
dct[u"??????"] = (dct[u"?????"] / dct[u"?????"]).astype(str)
dct[u"?????"] = dct[u"?????"].astype(str)
dct[u"??"] = dct[u"?????"] / dct[u"?????"]
orders = self.order_details
start = orders["??????"].iloc[0]
end = orders["??????"].iloc[-1]
dct[u"????"] = [_workdays(start, end), np.nan, np.nan]
result = pd.DataFrame(data=dct).T
return result
def history(self, symbol=None, frequency=None, fields=None, start=None, end=None, length=None, db=None):
if frequency is None:
frequency = self.frequency
try:
if symbol is None:
symbol = list(self._panels[frequency].items)
result = self._read_panel(symbol, frequency, fields, start, end, length)
if self.match(result, symbol, length):
return result
else:
raise KeyError()
except KeyError:
if symbol is None:
symbol = list(self._panels[self.frequency].items())
if end is None:
end = self.time
result = self._read_db(symbol, frequency, fields, start, end, length, db if db else self._db)
if isinstance(result, pd.Panel) and len(result.minor_axis) == 1:
return result.iloc[:, :, 0]
else:
return result
def match(result, items, length):
if length:
if isinstance(result, (pd.DataFrame, pd.Series)):
if len(result) == length:
return True
else:
return False
elif isinstance(result, pd.Panel):
if (len(items) == len(result.items)) and (len(result.major_axis) == length):
return True
else:
return False
else:
return False
else:
return True
def __init__(self, panel, context=None, side="L", frequency='D'):
"""
Create a PannelDataSupport with a pandas.Panel object.
Panel's inner data can be accessed using method history() and current()
context is a optional parameters, to
Args:
panel(pandas.Panel): Panel where real data stored in
context: default end bar number refer to context.real_bar_num
side(str): "L" or "R", "L" means bar's datetime refer to it's start time
"R" means bar's datetime refer to it's end time
"""
super(PanelDataSupport, self).__init__()
self._panel = panel
self._frequency = frequency
self._others = {}
self._date_index = self._panel.iloc[0].index
self._side = side
self._context = context
def reshape(data):
if isinstance(data, pd.DataFrame):
if len(data) == 1:
return data.iloc[0]
elif len(data.columns) == 1:
return data.iloc[:, 0]
else:
return data
elif isinstance(data, pd.Panel):
if len(data.major_axis) == 1:
return data.iloc[:, 0, :]
elif len(data.minor_axis) == 1:
return data.iloc[:, :, 0]
else:
return data
else:
return data
def getHisdatPanl(codes, days):
"""k?????????
codes: [list]
days: [turple]
return: [pandas.panel]"""
def gen():
start_day , end_day = days
d = {}
for code in codes:
df = getHisdatDf(code, start_day, end_day)
d[code] = df
panel = pd.Panel(d)
return panel
panel = agl.SerialMgr.serialAuto(gen)
if panel is None:
panel = gen()
return panel
def save_panel(self):
"""
Take all supplied data and create the final pandas Panel
:return: pandas Panel
"""
assert 0 not in self.dimensions
assert self.data_dict != {}
if self.dict_key == 'time':
assert len(self.data_dict) == self.dimensions[1]
panel = pd.Panel(self.data_dict, index=self.time_series, major_axis=self.entities, minor_axis=self.variables).transpose(1,0,2) # put entities into items
elif self.dict_key == 'entity':
assert len(self.data_dict) == self.dimensions[0]
panel = pd.Panel(self.data_dict, major_axis=self.time_series, index=self.entities, minor_axis=self.variables)
else:
# not a dict, but a 3D np array
panel = pd.Panel(self.data_dict, major_axis=self.time_series, index=self.entities, minor_axis=self.variables)
print(panel)
self.panel = panel
return panel
def test_numpy_3d():
n, t, k = 11, 7, 3
x = np.random.random((k, t, n))
dh = PanelData(x)
assert_equal(x, dh.values3d)
assert dh.nentity == n
assert dh.nobs == t
assert dh.nvar == k
assert_equal(np.reshape(x.T, (n * t, k)), dh.values2d)
items = ['entity.{0}'.format(i) for i in range(n)]
obs = [i for i in range(t)]
var_names = ['x.{0}'.format(i) for i in range(k)]
expected = pd.Panel(np.reshape(x, (k, t, n)), items=var_names,
major_axis=obs, minor_axis=items)
expected_frame = expected.swapaxes(1, 2).to_frame()
expected_frame.index.levels[0].name = 'entity'
expected_frame.index.levels[1].name = 'time'
assert_frame_equal(dh.dataframe, expected_frame)
def test_categorical_conversion():
t, n = 3, 1000
string = np.random.choice(['a', 'b', 'c'], (t, n))
num = np.random.randn(t, n)
p = pd.Panel({'a': string, 'b': num})
p = p[['a', 'b']]
panel = PanelData(p, convert_dummies=False)
df = panel.dataframe.copy()
df['a'] = pd.Categorical(df['a'])
panel = PanelData(df, convert_dummies=True)
df = panel.dataframe
assert df.shape == (3000, 3)
s = string.T.ravel()
a_locs = np.where(s == 'a')
b_locs = np.where(s == 'b')
c_locs = np.where(s == 'c')
assert np.all(df.loc[:, 'a.b'].values[a_locs] == 0.0)
assert np.all(df.loc[:, 'a.b'].values[b_locs] == 1.0)
assert np.all(df.loc[:, 'a.b'].values[c_locs] == 0.0)
assert np.all(df.loc[:, 'a.c'].values[a_locs] == 0.0)
assert np.all(df.loc[:, 'a.c'].values[b_locs] == 0.0)
assert np.all(df.loc[:, 'a.c'].values[c_locs] == 1.0)
def test_string_conversion():
t, n = 3, 1000
string = np.random.choice(['a', 'b', 'c'], (t, n))
num = np.random.randn(t, n)
p = pd.Panel({'a': string, 'b': num})
p = p[['a', 'b']]
panel = PanelData(p, var_name='OtherEffect')
df = panel.dataframe
assert df.shape == (3000, 3)
s = string.T.ravel()
a_locs = np.where(s == 'a')
b_locs = np.where(s == 'b')
c_locs = np.where(s == 'c')
assert np.all(df.loc[:, 'a.b'].values[a_locs] == 0.0)
assert np.all(df.loc[:, 'a.b'].values[b_locs] == 1.0)
assert np.all(df.loc[:, 'a.b'].values[c_locs] == 0.0)
assert np.all(df.loc[:, 'a.c'].values[a_locs] == 0.0)
assert np.all(df.loc[:, 'a.c'].values[b_locs] == 0.0)
assert np.all(df.loc[:, 'a.c'].values[c_locs] == 1.0)
def test_incorrect_time_axis():
x = np.random.randn(3, 3, 1000)
entities = ['entity.{0}'.format(i) for i in range(1000)]
time = ['time.{0}'.format(i) for i in range(3)]
var_names = ['var.{0}'.format(i) for i in range(3)]
p = pd.Panel(x, items=var_names, major_axis=time, minor_axis=entities)
with pytest.raises(ValueError):
PanelData(p)
df = p.swapaxes(1, 2).swapaxes(0, 1).to_frame()
with pytest.raises(ValueError):
PanelData(df)
time = [1, pd.datetime(1960, 1, 1), 'a']
var_names = ['var.{0}'.format(i) for i in range(3)]
p = pd.Panel(x, items=var_names, major_axis=time, minor_axis=entities)
with pytest.raises(ValueError):
PanelData(p)
df = p.swapaxes(1, 2).swapaxes(0, 1).to_frame()
with pytest.raises(ValueError):
PanelData(df)
def test_first_difference_errors(data):
if isinstance(data.x, pd.Panel):
x = data.x.iloc[:, [0], :]
y = data.y.iloc[[0], :]
else:
x = data.x[:, [0], :]
y = data.y[[0], :]
with pytest.raises(ValueError):
FirstDifferenceOLS(y, x)
if not isinstance(data.x, pd.Panel):
return
x = data.x.copy()
x['Intercept'] = 1.0
with pytest.raises(ValueError):
FirstDifferenceOLS(data.y, x)
def first_difference(self):
"""
Compute first differences of variables
Returns
-------
diffs : PanelData
Differenced values
"""
diffs = self.panel.values
diffs = diffs[:, 1:] - diffs[:, :-1]
diffs = Panel(diffs, items=self.panel.items,
major_axis=self.panel.major_axis[1:],
minor_axis=self.panel.minor_axis)
diffs = diffs.swapaxes(1, 2).to_frame(filter_observations=False)
diffs = diffs.reindex(self._frame.index).dropna(how='any')
return PanelData(diffs)
def mass_contaminant_consumed(node_results):
""" Mass of contaminant consumed, equation from [1].
Parameters
----------
node_results : pd.Panel
A pandas Panel containing node results.
Items axis = attributes, Major axis = times, Minor axis = node names
Mass of contaminant consumed uses 'demand' and quality' attrbutes.
References
----------
[1] EPA, U. S. (2015). Water security toolkit user manual version 1.3.
Technical report, U.S. Environmental Protection Agency
"""
maskD = np.greater(node_results['demand'], 0) # positive demand
deltaT = node_results['quality'].index[1] # this assumes constant timedelta
MC = node_results['demand']*deltaT*node_results['quality']*maskD # m3/s * s * kg/m3 - > kg
return MC
def volume_contaminant_consumed(node_results, detection_limit):
""" Volume of contaminant consumed, equation from [1].
Parameters
----------
node_results : pd.Panel
A pandas Panel containing node results.
Items axis = attributes, Major axis = times, Minor axis = node names
Volume of contaminant consumed uses 'demand' and quality' attrbutes.
detection_limit : float
Contaminant detection limit
References
----------
[1] EPA, U. S. (2015). Water security toolkit user manual version 1.3.
Technical report, U.S. Environmental Protection Agency
"""
maskQ = np.greater(node_results['quality'], detection_limit)
maskD = np.greater(node_results['demand'], 0) # positive demand
deltaT = node_results['quality'].index[1] # this assumes constant timedelta
VC = node_results['demand']*deltaT*maskQ*maskD # m3/s * s * bool - > m3
return VC
def setup_ep_results(self, times, nodes, links, result_types=None):
"""Set up the results object (or file, etc.) for save_ep_line() calls to use.
The basic implementation sets up a dictionary of pandas DataFrames with the keys
being member names of the ResultsType class. If the items parameter is left blank,
the function will use the items that were specified during object creation.
If this too, was blank, then all results parameters will be saved.
"""
if result_types is None:
result_types = self.items
link_items = [ member.name for member in result_types if member.is_link ]
node_items = [ member.name for member in result_types if member.is_node ]
self.results.node = pd.Panel(items=node_items, major_axis=times, minor_axis=nodes)
self.results.link = pd.Panel(items=link_items, major_axis=times, minor_axis=links)
self.results.time = times
self.results.network_name = self.inp_file
def parse(self, entry):
data = pd.read_csv(str(entry),
engine= "c",
sep= "\t",
index_col= 0,
parse_dates= True,
infer_datetime_format= True)
if data.index.name is not None: data.index.name = data.index.name.lower()
data.columns = list(range(24)) * 3
paneldata = pd.Panel({
"above": data.iloc[:, 0:24],
"all": data.iloc[:, 24:48],
"percent": data.iloc[:, 48:72]
})
paneldata.minor_axis.name = "hour"
return paneldata
def digest_bars(self, history_spec, do_ffill):
"""
Get the last (history_spec.bar_count - 1) bars from self.digest_panel
for the requested HistorySpec.
"""
bar_count = history_spec.bar_count
if bar_count == 1:
# slicing with [1 - bar_count:] doesn't work when bar_count == 1,
# so special-casing this.
res = pd.DataFrame(index=[], columns=self.sids, dtype=float)
return res.values, res.index
field = history_spec.field
# Panel axes are (field, dates, sids). We want just the entries for
# the requested field, the last (bar_count - 1) data points, and all
# sids.
digest_panel = self.digest_panels[history_spec.frequency]
frame = digest_panel.get_current(field, raw=True)
if do_ffill:
# Do forward-filling *before* truncating down to the requested
# number of bars. This protects us from losing data if an illiquid
# stock has a gap in its price history.
filled = ffill_digest_frame_from_prior_values(
history_spec.frequency,
history_spec.field,
frame,
self.last_known_prior_values,
raw=True
# Truncate only after we've forward-filled
)
indexer = slice(1 - bar_count, None)
return filled[indexer], digest_panel.current_dates()[indexer]
else:
indexer = slice(1 - bar_count, None)
return frame[indexer, :], digest_panel.current_dates()[indexer]
def buffer_panel_minutes(self,
buffer_panel,
earliest_minute=None,
latest_minute=None,
raw=False):
"""
Get the minutes in @buffer_panel between @earliest_minute and
@latest_minute, inclusive.
@buffer_panel can be a RollingPanel or a plain Panel. If a
RollingPanel is supplied, we call `get_current` to extract a Panel
object.
If no value is specified for @earliest_minute, use all the minutes we
have up until @latest minute.
If no value for @latest_minute is specified, use all values up until
the latest minute.
"""
if isinstance(buffer_panel, RollingPanel):
buffer_panel = buffer_panel.get_current(start=earliest_minute,
end=latest_minute,
raw=raw)
return buffer_panel
# Using .ix here rather than .loc because loc requires that the keys
# are actually in the index, whereas .ix returns all the values between
# earliest_minute and latest_minute, which is what we want.
return buffer_panel.ix[:, earliest_minute:latest_minute, :]
def _create_buffer(self):
panel = pd.Panel(
items=self.items,
minor_axis=self.minor_axis,
major_axis=range(self.cap),
dtype=self.dtype,
)
return panel
def _create_buffer(self):
panel = pd.Panel(
items=self.items,
minor_axis=self.minor_axis,
major_axis=range(self.cap),
dtype=self.dtype,
)
return panel
def get_current(self):
"""
Get a Panel that is the current data in view. It is not safe to persist
these objects because internal data might change
"""
where = slice(self._oldest_frame_idx(), self._pos)
major_axis = pd.DatetimeIndex(deepcopy(self.date_buf[where]), tz='utc')
return pd.Panel(self.buffer.values[:, where, :], self.items,
major_axis, self.minor_axis, dtype=self.dtype)
def make_trade_panel_for_asset_info(dates,
asset_info,
price_start,
price_step_by_date,
price_step_by_sid,
volume_start,
volume_step_by_date,
volume_step_by_sid):
"""
locations where assets did not exist.
"""
sids = list(asset_info.index)
price_sid_deltas = np.arange(len(sids), dtype=float) * price_step_by_sid
price_date_deltas = np.arange(len(dates), dtype=float) * price_step_by_date
prices = (price_sid_deltas + price_date_deltas[:, None]) + price_start
volume_sid_deltas = np.arange(len(sids)) * volume_step_by_sid
volume_date_deltas = np.arange(len(dates)) * volume_step_by_date
volumes = (volume_sid_deltas + volume_date_deltas[:, None]) + volume_start
for j, sid in enumerate(sids):
start_date, end_date = asset_info.loc[sid, ['start_date', 'end_date']]
# Normalize here so the we still generate non-NaN values on the minutes
# for an asset's last trading day.
for i, date in enumerate(dates.normalize()):
if not (start_date <= date <= end_date):
prices[i, j] = np.nan
volumes[i, j] = 0
# Legacy panel sources use a flipped convention from what we return
# elsewhere.
return pd.Panel(
{
'price': prices,
'volume': volumes,
},
major_axis=dates,
minor_axis=sids,
).transpose(2, 1, 0)
def test_basics(self, window=10):
items = ['bar', 'baz', 'foo']
minor = ['A', 'B', 'C', 'D']
rp = MutableIndexRollingPanel(window, items, minor, cap_multiple=2)
dates = pd.date_range('2000-01-01', periods=30, tz='utc')
major_deque = deque(maxlen=window)
frames = {}
for i, date in enumerate(dates):
frame = pd.DataFrame(np.random.randn(3, 4), index=items,
columns=minor)
rp.add_frame(date, frame)
frames[date] = frame
major_deque.append(date)
result = rp.get_current()
expected = pd.Panel(frames, items=list(major_deque),
major_axis=items, minor_axis=minor)
tm.assert_panel_equal(result, expected.swapaxes(0, 1))
def test_close_position_event(self):
pt = perf.PositionTracker(asset_finder=self.env.asset_finder)
dt = pd.Timestamp("1984/03/06 3:00PM")
pos1 = perf.Position(1, amount=np.float64(120.0),
last_sale_date=dt, last_sale_price=3.4)
pos2 = perf.Position(2, amount=np.float64(-100.0),
last_sale_date=dt, last_sale_price=3.4)
pt.update_positions({1: pos1, 2: pos2})
event_type = DATASOURCE_TYPE.CLOSE_POSITION
index = [dt + timedelta(days=1)]
pan = pd.Panel({1: pd.DataFrame({'price': 1, 'volume': 0,
'type': event_type}, index=index),
2: pd.DataFrame({'price': 1, 'volume': 0,
'type': event_type}, index=index),
3: pd.DataFrame({'price': 1, 'volume': 0,
'type': event_type}, index=index)})
source = DataPanelSource(pan)
for i, event in enumerate(source):
txn = pt.maybe_create_close_position_transaction(event)
if event.sid == 1:
# Test owned long
self.assertEqual(-120, txn.amount)
elif event.sid == 2:
# Test owned short
self.assertEqual(100, txn.amount)
elif event.sid == 3:
# Test not-owned SID
self.assertIsNone(txn)
def setUp(self):
self.env = TradingEnvironment()
self.days = self.env.trading_days[:4]
self.panel = pd.Panel({1: pd.DataFrame({
'price': [1, 1, 2, 4], 'volume': [1e9, 1e9, 1e9, 0],
'type': [DATASOURCE_TYPE.TRADE,
DATASOURCE_TYPE.TRADE,
DATASOURCE_TYPE.TRADE,
DATASOURCE_TYPE.CLOSE_POSITION]},
index=self.days)
})
def readGraceData(filename, lat_name, lon_name, data_name, time=None):
'''
This function reads in netcdf data provided by GRACE Tellus
@param filename: Name of file to read in
@param lat_name: Name of latitude data
@param lon_name: Name of longitude data
@param data_name: Name of data product
@param time: Name of time data
'''
nc = Dataset(filename, 'r')
lat_index = nc[lat_name][:]
lon_index = nc[lon_name][:]
data = nc[data_name][:]
if time != None:
time = nc.variables[time]
date_index = pd.to_datetime(num2date(time[:],units=time.units,calendar=time.calendar))
return pd.Panel(data=data, items=date_index,major_axis=lat_index, minor_axis=lon_index)
else:
return pd.DataFrame(data = data, columns=lon_index, index=lat_index)
def __init__(self):
"""Initialize parameters of the Interactive Brokers price handler
object.
"""
super(InteractiveBrokersPriceHandler, self).__init__()
self.conn = ibConnection(
clientId=IB.data_handler_id.value, port=IB.port.value
)
self.conn.register(self.__tick_price_handler, message.tickPrice)
if not self.conn.connect():
raise ValueError(
"Odin was unable to connect to the Trader Workstation."
)
# Set the target field to download data from.
today = dt.datetime.today()
open_t, close_t = dt.time(9, 30), dt.time(16)
cur_t = today.time()
# If today is a weekday and the timing is correct, then we use the most
# recently observed price. Otherwise we use the close price.
if today.weekday() < 5 and cur_t >= open_t and cur_t <= close_t:
self.field = TickType.LAST
else:
self.field = TickType.CLOSE
# Initialize a pandas panel to store the price data.
self.bar = pd.Panel(items=[PriceFields.current_price.value])
def verify_indices_all_unique(obj):
"""
Check that all axes of a pandas object are unique.
Parameters
----------
obj : pd.Series / pd.DataFrame / pd.Panel
The object to validate.
Returns
-------
obj : pd.Series / pd.DataFrame / pd.Panel
The validated object, unchanged.
Raises
------
ValueError
If any axis has duplicate entries.
"""
axis_names = [
('index',), # Series
('index', 'columns'), # DataFrame
('items', 'major_axis', 'minor_axis') # Panel
][obj.ndim - 1] # ndim = 1 should go to entry 0,
for axis_name, index in zip(axis_names, obj.axes):
if index.is_unique:
continue
raise ValueError(
"Duplicate entries in {type}.{axis}: {dupes}.".format(
type=type(obj).__name__,
axis=axis_name,
dupes=sorted(index[index.duplicated()]),
)
)
return obj
def _create_buffer(self):
panel = pd.Panel(
items=self.items,
minor_axis=self.minor_axis,
major_axis=range(self.cap),
dtype=self.dtype,
)
return panel
def _create_buffer(self):
panel = pd.Panel(
items=self.items,
minor_axis=self.minor_axis,
major_axis=range(self.cap),
dtype=self.dtype,
)
return panel
def get_peak_info_panel(self):
pn = pd.Panel(OrderedDict([
('Peak Size ({})'.format(self.get_peak_size_units()), self.get_peak_size()),
('Peak Center ({})'.format(self.x_units), self.get_peak_center()),
('FWHM ({})'.format(self.x_units), self.get_peak_fwhm_absolute()),
('FWHM (ratio)', self.get_peak_fwhm_relative()),
]))
pn = pn.swapaxes('items', 'major')
return pn
def stderrs(self):
"""The standard errors of the parameter estimates."""
return DataFrame(self._get('bse'), index=self._result_idx,
columns=self.exog.columns)
# 3d data (return type is a MultiIndex pd.DataFrame)
# Note that pd.Panel was deprecated in 0.20.1
# For models with >1 exogenous variable, these properties consist of an
# nxm vector for each rolling period.
# The "outer" index will be _result_idx (period-ending basis), with the
# inner indices being the individual periods within each outer period.
# --------------------------------------------------------------------------
def keys(self):
"""Get the 'info axis' (see Indexing for more)
This is index for Series, columns for DataFrame and major_axis for
Panel.
"""
return self._info_axis
def iteritems(self):
"""Iterate over (label, values) on info axis
This is index for Series, columns for DataFrame, major_axis for Panel,
and so on.
"""
for h in self._info_axis:
yield h, self[h]
# originally used to get around 2to3's changes to iteritems.
# Now unnecessary. Sidenote: don't want to deprecate this for a while,
# otherwise libraries that use 2to3 will have issues.