Python pandas 模块,read_msgpack() 实例源码
我们从Python开源项目中,提取了以下5个代码示例,用于说明如何使用pandas.read_msgpack()。
def __init__(self,
path=None,
lock=None,
clean_on_failure=True,
serialization='msgpack'):
self.path = path if path is not None else mkdtemp()
self.lock = lock if lock is not None else nop_context
self.clean_on_failure = clean_on_failure
if serialization == 'msgpack':
self.serialize = pd.DataFrame.to_msgpack
self.deserialize = pd.read_msgpack
self._protocol = None
else:
s = serialization.split(':', 1)
if s[0] != 'pickle':
raise ValueError(
"'serialization' must be either 'msgpack' or 'pickle[:n]'",
)
self._protocol = int(s[1]) if len(s) == 2 else None
self.serialize = self._serialize_pickle
self.deserialize = pickle.load
ensure_directory(self.path)
def df_from_bytes_msgpack_(bytes_: bytes) -> pd.DataFrame:
try:
df = pd.read_msgpack(BytesIO(bytes_))
except UnicodeDecodeError:
raise DataFrameLoadException("Not a DataFrame")
if not isinstance(df, pd.DataFrame):
raise DataFrameLoadException("Not a DataFrame")
return df
def load_bytes(self, bytestring, data_source=''):
load_methods = [
msgpack_lz4_to_series,
pd.read_msgpack,
pickle.loads,
]
seria = None
for loader in load_methods:
try:
loaded = loader(bytestring)
except Exception as err:
continue
if isinstance(loaded, pd.Series):
seria = [loaded]
elif isinstance(loaded, pd.DataFrame):
seria = list(map(
itemgetter(1),
loaded.iteritems()
))
elif isinstance(loaded, list):
seria = loaded
else:
logger.error('Unexpected object found: {:.30}... (using deserializer {}'
''.format(seria, loader))
return
if seria is None:
logger.error('Could not deserialize contents of {} with any of {}'
''.format(data_source, load_methods))
return
for idx, series in enumerate(seria):
if not series.name:
if os.path.exists(data_source):
prefix = os.path.split(data_source)[1]
else:
prefix=data_source
series.name = '{}_{}'.join(map(str, [prefix, idx]))
self.model.add_dataitem(series, name=series.name)
logger.info('Loaded "{n}" ({v} values) from {src}'
''.format(n=series.name, v=len(series), src=data_source))
def df_multi_reader(filename: str, limit: bool=False) -> PandasDF:
df = DataFrame()
try:
assert isinstance(filename, str), "filename isn't string %s" % filename
assert isinstance(limit, bool), "limit isn't bool %s" % limit
if settings.DATA_TYPE == "pickle":
f = filename + ".mp"
if isfile(f):
df = read_pickle(f)
if settings.DATA_TYPE == "proto2":
f = filename + ".pr2"
if isfile(f):
df = read_pickle(f)
if settings.DATA_TYPE == "messagepack":
f = filename + ".pack"
if isfile(f):
df = read_msgpack(f)
if settings.DATA_TYPE == "json":
f = filename + ".json"
if isfile(f):
df = read_json(f)
if settings.DATA_TYPE == "feather":
#TODO feather doesn't handle indexes
f = filename + ".fth"
if isfile(f):
df = read_feather(f).reset_index()
if settings.DATA_TYPE == "hdf":
f = filename + ".hdf"
if isfile(f):
df = read_hdf(f, key=filename)
if settings.DATA_TYPE == "hdfone":
f = join(settings.DATA_PATH, "hdfone.hdfone")
if isfile(f):
df = read_hdf(f, key=filename, mode='r')
if limit:
if len(df.index) > 0:
df = df.last(settings.LIMIT_MONTHS)
except Exception as err:
print(colored.red("MultiReader {}".format(err)))
return df
def nonasy_df_multi_reader(filename: str, limit: bool=False) -> PandasDF:
df = DataFrame()
try:
assert isinstance(filename, str), "filename isn't string: %s" % filename
assert isinstance(limit, bool), "limit isn't bool: %s" % limit
if settings.DATA_TYPE == "pickle":
f = filename + ".mp"
if isfile(f):
df = read_pickle(f)
if settings.DATA_TYPE == "proto2":
f = filename + ".pr2"
if isfile(f):
df = read_pickle(f)
if settings.DATA_TYPE == "messagepack":
f = filename + ".pack"
if isfile(f):
df = read_msgpack(f)
if settings.DATA_TYPE == "json":
f = filename + ".json"
if isfile(f):
df = read_json(f)
if settings.DATA_TYPE == "feather":
#TODO feather doesn't handle indexes
f = filename + ".fth"
if isfile(f):
df = read_feather(f).reset_index()
if settings.DATA_TYPE == "hdf":
f = filename + ".hdf"
if isfile(f):
df = read_hdf(f, key=filename)
if settings.DATA_TYPE == "hdfone":
f = join(settings.DATA_PATH, "hdfone.hdfone")
if isfile(f):
df = read_hdf(f, key=filename, mode='r')
if limit:
df = df.last(settings.LIMIT_MONTHS)
except Exception as err:
print(colored.red("MultiReader {}".format(err)))
return df