Python pandas 模块,option_context() 实例源码
我们从Python开源项目中,提取了以下42个代码示例,用于说明如何使用pandas.option_context()。
def summary(self):
"""
Print the summary of the Process model.
:return: None
"""
with pd.option_context("precision", self.options["precision"]):
full_model = self.outcome_models[self.iv]
m_models = [self.outcome_models.get(med_name) for med_name in self.mediators]
if self.options["detail"]:
print("\n***************************** OUTCOME MODELS ****************************\n")
print(full_model)
print("\n-------------------------------------------------------------------------\n")
for med_model in m_models:
print(med_model)
print("\n-------------------------------------------------------------------------\n")
if self.indirect_model:
print("\n********************** DIRECT AND INDIRECT EFFECTS **********************\n")
print(self.direct_model)
print(self.indirect_model)
else:
print("\n********************** CONDITIONAL EFFECTS **********************\n")
print(self.direct_model)
def test_show_null_counts(self):
df = DataFrame(1, columns=range(10), index=range(10))
df.iloc[1, 1] = np.nan
def check(null_counts, result):
buf = StringIO()
df.info(buf=buf, null_counts=null_counts)
self.assertTrue(('non-null' in buf.getvalue()) is result)
with option_context('display.max_info_rows', 20,
'display.max_info_columns', 20):
check(None, True)
check(True, True)
check(False, False)
with option_context('display.max_info_rows', 5,
'display.max_info_columns', 5):
check(None, False)
check(True, False)
check(False, False)
def test_repr_truncation(self):
max_len = 20
with option_context("display.max_colwidth", max_len):
df = DataFrame({'A': np.random.randn(10),
'B': [tm.rands(np.random.randint(
max_len - 1, max_len + 1)) for i in range(10)
]})
r = repr(df)
r = r[r.find('\n') + 1:]
adj = fmt._get_adjustment()
for line, value in lzip(r.split('\n'), df['B']):
if adj.len(value) + 1 > max_len:
self.assertIn('...', line)
else:
self.assertNotIn('...', line)
with option_context("display.max_colwidth", 999999):
self.assertNotIn('...', repr(df))
with option_context("display.max_colwidth", max_len + 2):
self.assertNotIn('...', repr(df))
def test_expand_frame_repr(self):
df_small = DataFrame('hello', [0], [0])
df_wide = DataFrame('hello', [0], lrange(10))
df_tall = DataFrame('hello', lrange(30), lrange(5))
with option_context('mode.sim_interactive', True):
with option_context('display.max_columns', 10, 'display.width', 20,
'display.max_rows', 20,
'display.show_dimensions', True):
with option_context('display.expand_frame_repr', True):
self.assertFalse(has_truncated_repr(df_small))
self.assertFalse(has_expanded_repr(df_small))
self.assertFalse(has_truncated_repr(df_wide))
self.assertTrue(has_expanded_repr(df_wide))
self.assertTrue(has_vertically_truncated_repr(df_tall))
self.assertTrue(has_expanded_repr(df_tall))
with option_context('display.expand_frame_repr', False):
self.assertFalse(has_truncated_repr(df_small))
self.assertFalse(has_expanded_repr(df_small))
self.assertFalse(has_horizontally_truncated_repr(df_wide))
self.assertFalse(has_expanded_repr(df_wide))
self.assertTrue(has_vertically_truncated_repr(df_tall))
self.assertFalse(has_expanded_repr(df_tall))
def test_str_max_colwidth(self):
# GH 7856
df = pd.DataFrame([{'a': 'foo',
'b': 'bar',
'c': 'uncomfortably long line with lots of stuff',
'd': 1}, {'a': 'foo',
'b': 'bar',
'c': 'stuff',
'd': 1}])
df.set_index(['a', 'b', 'c'])
self.assertTrue(
str(df) ==
' a b c d\n'
'0 foo bar uncomfortably long line with lots of stuff 1\n'
'1 foo bar stuff 1')
with option_context('max_colwidth', 20):
self.assertTrue(str(df) == ' a b c d\n'
'0 foo bar uncomfortably lo... 1\n'
'1 foo bar stuff 1')
def test_wide_repr(self):
with option_context('mode.sim_interactive', True,
'display.show_dimensions', True):
max_cols = get_option('display.max_columns')
df = DataFrame(tm.rands_array(25, size=(10, max_cols - 1)))
set_option('display.expand_frame_repr', False)
rep_str = repr(df)
assert "10 rows x %d columns" % (max_cols - 1) in rep_str
set_option('display.expand_frame_repr', True)
wide_repr = repr(df)
self.assertNotEqual(rep_str, wide_repr)
with option_context('display.width', 120):
wider_repr = repr(df)
self.assertTrue(len(wider_repr) < len(wide_repr))
reset_option('display.expand_frame_repr')
def test_wide_repr_named(self):
with option_context('mode.sim_interactive', True):
max_cols = get_option('display.max_columns')
df = DataFrame(tm.rands_array(25, size=(10, max_cols - 1)))
df.index.name = 'DataFrame Index'
set_option('display.expand_frame_repr', False)
rep_str = repr(df)
set_option('display.expand_frame_repr', True)
wide_repr = repr(df)
self.assertNotEqual(rep_str, wide_repr)
with option_context('display.width', 150):
wider_repr = repr(df)
self.assertTrue(len(wider_repr) < len(wide_repr))
for line in wide_repr.splitlines()[1::13]:
self.assertIn('DataFrame Index', line)
reset_option('display.expand_frame_repr')
def test_wide_repr_multiindex_cols(self):
with option_context('mode.sim_interactive', True):
max_cols = get_option('display.max_columns')
midx = MultiIndex.from_arrays(tm.rands_array(5, size=(2, 10)))
mcols = MultiIndex.from_arrays(tm.rands_array(3, size=(2, max_cols
- 1)))
df = DataFrame(tm.rands_array(25, (10, max_cols - 1)),
index=midx, columns=mcols)
df.index.names = ['Level 0', 'Level 1']
set_option('display.expand_frame_repr', False)
rep_str = repr(df)
set_option('display.expand_frame_repr', True)
wide_repr = repr(df)
self.assertNotEqual(rep_str, wide_repr)
with option_context('display.width', 150):
wider_repr = repr(df)
self.assertTrue(len(wider_repr) < len(wide_repr))
reset_option('display.expand_frame_repr')
def test_show_dimensions(self):
df = DataFrame(123, lrange(10, 15), lrange(30))
with option_context('display.max_rows', 10, 'display.max_columns', 40,
'display.width', 500, 'display.expand_frame_repr',
'info', 'display.show_dimensions', True):
self.assertTrue('5 rows' in str(df))
self.assertTrue('5 rows' in df._repr_html_())
with option_context('display.max_rows', 10, 'display.max_columns', 40,
'display.width', 500, 'display.expand_frame_repr',
'info', 'display.show_dimensions', False):
self.assertFalse('5 rows' in str(df))
self.assertFalse('5 rows' in df._repr_html_())
with option_context('display.max_rows', 2, 'display.max_columns', 2,
'display.width', 500, 'display.expand_frame_repr',
'info', 'display.show_dimensions', 'truncate'):
self.assertTrue('5 rows' in str(df))
self.assertTrue('5 rows' in df._repr_html_())
with option_context('display.max_rows', 10, 'display.max_columns', 40,
'display.width', 500, 'display.expand_frame_repr',
'info', 'display.show_dimensions', 'truncate'):
self.assertFalse('5 rows' in str(df))
self.assertFalse('5 rows' in df._repr_html_())
def test_info_repr(self):
max_rows = get_option('display.max_rows')
max_cols = get_option('display.max_columns')
# Long
h, w = max_rows + 1, max_cols - 1
df = DataFrame(dict((k, np.arange(1, 1 + h)) for k in np.arange(w)))
assert has_vertically_truncated_repr(df)
with option_context('display.large_repr', 'info'):
assert has_info_repr(df)
# Wide
h, w = max_rows - 1, max_cols + 1
df = DataFrame(dict((k, np.arange(1, 1 + h)) for k in np.arange(w)))
assert has_horizontally_truncated_repr(df)
with option_context('display.large_repr', 'info'):
assert has_info_repr(df)
def test_info_repr_html(self):
max_rows = get_option('display.max_rows')
max_cols = get_option('display.max_columns')
# Long
h, w = max_rows + 1, max_cols - 1
df = DataFrame(dict((k, np.arange(1, 1 + h)) for k in np.arange(w)))
assert r'<class' not in df._repr_html_()
with option_context('display.large_repr', 'info'):
assert r'<class' in df._repr_html_()
# Wide
h, w = max_rows - 1, max_cols + 1
df = DataFrame(dict((k, np.arange(1, 1 + h)) for k in np.arange(w)))
assert '<class' not in df._repr_html_()
with option_context('display.large_repr', 'info'):
assert '<class' in df._repr_html_()
def test_format_explicit(self):
test_sers = self.gen_test_series()
with option_context("display.max_rows", 4):
res = repr(test_sers['onel'])
exp = '0 a\n1 a\n ..\n98 a\n99 a\ndtype: object'
self.assertEqual(exp, res)
res = repr(test_sers['twol'])
exp = ('0 ab\n1 ab\n ..\n98 ab\n99 ab\ndtype:'
' object')
self.assertEqual(exp, res)
res = repr(test_sers['asc'])
exp = ('0 a\n1 ab\n ... \n4 abcde\n5'
' abcdef\ndtype: object')
self.assertEqual(exp, res)
res = repr(test_sers['desc'])
exp = ('5 abcdef\n4 abcde\n ... \n1 ab\n0'
' a\ndtype: object')
self.assertEqual(exp, res)
def df_to_html(df, border=0, classes=('table', 'table-striped', 'table-hover'),
**kwargs):
"""Convert a dataframe to HTML without truncating contents.
pandas will truncate cell contents that exceed 50 characters by default.
Use this function to avoid this truncation behavior.
This function uses different default parameters than `DataFrame.to_html` to
give uniform styling to HTML tables that are compatible with q2template
themes. These parameters can be overridden, and they (along with any other
parameters) will be passed through to `DataFrame.to_html`.
Parameters
----------
df : pd.DataFrame
DataFrame to convert to HTML.
kwargs : dict
Parameters passed through to `pd.DataFrame.to_html`.
Returns
-------
str
DataFrame converted to HTML.
References
----------
.. [1] https://stackoverflow.com/q/26277757/3776794
.. [2] https://github.com/pandas-dev/pandas/issues/1852
"""
with pd.option_context('display.max_colwidth', -1):
return df.to_html(border=border, classes=classes, **kwargs)
def test_representation(self):
idx = []
idx.append(DatetimeIndex([], freq='D'))
idx.append(DatetimeIndex(['2011-01-01'], freq='D'))
idx.append(DatetimeIndex(['2011-01-01', '2011-01-02'], freq='D'))
idx.append(DatetimeIndex(
['2011-01-01', '2011-01-02', '2011-01-03'], freq='D'))
idx.append(DatetimeIndex(
['2011-01-01 09:00', '2011-01-01 10:00', '2011-01-01 11:00'
], freq='H', tz='Asia/Tokyo'))
idx.append(DatetimeIndex(
['2011-01-01 09:00', '2011-01-01 10:00', pd.NaT], tz='US/Eastern'))
idx.append(DatetimeIndex(
['2011-01-01 09:00', '2011-01-01 10:00', pd.NaT], tz='UTC'))
exp = []
exp.append("""DatetimeIndex([], dtype='datetime64[ns]', freq='D')""")
exp.append("DatetimeIndex(['2011-01-01'], dtype='datetime64[ns]', "
"freq='D')")
exp.append("DatetimeIndex(['2011-01-01', '2011-01-02'], "
"dtype='datetime64[ns]', freq='D')")
exp.append("DatetimeIndex(['2011-01-01', '2011-01-02', '2011-01-03'], "
"dtype='datetime64[ns]', freq='D')")
exp.append("DatetimeIndex(['2011-01-01 09:00:00+09:00', "
"'2011-01-01 10:00:00+09:00', '2011-01-01 11:00:00+09:00']"
", dtype='datetime64[ns, Asia/Tokyo]', freq='H')")
exp.append("DatetimeIndex(['2011-01-01 09:00:00-05:00', "
"'2011-01-01 10:00:00-05:00', 'NaT'], "
"dtype='datetime64[ns, US/Eastern]', freq=None)")
exp.append("DatetimeIndex(['2011-01-01 09:00:00+00:00', "
"'2011-01-01 10:00:00+00:00', 'NaT'], "
"dtype='datetime64[ns, UTC]', freq=None)""")
with pd.option_context('display.width', 300):
for indx, expected in zip(idx, exp):
for func in ['__repr__', '__unicode__', '__str__']:
result = getattr(indx, func)()
self.assertEqual(result, expected)
def test_representation(self):
idx1 = TimedeltaIndex([], freq='D')
idx2 = TimedeltaIndex(['1 days'], freq='D')
idx3 = TimedeltaIndex(['1 days', '2 days'], freq='D')
idx4 = TimedeltaIndex(['1 days', '2 days', '3 days'], freq='D')
idx5 = TimedeltaIndex(['1 days 00:00:01', '2 days', '3 days'])
exp1 = """TimedeltaIndex([], dtype='timedelta64[ns]', freq='D')"""
exp2 = ("TimedeltaIndex(['1 days'], dtype='timedelta64[ns]', "
"freq='D')")
exp3 = ("TimedeltaIndex(['1 days', '2 days'], "
"dtype='timedelta64[ns]', freq='D')")
exp4 = ("TimedeltaIndex(['1 days', '2 days', '3 days'], "
"dtype='timedelta64[ns]', freq='D')")
exp5 = ("TimedeltaIndex(['1 days 00:00:01', '2 days 00:00:00', "
"'3 days 00:00:00'], dtype='timedelta64[ns]', freq=None)")
with pd.option_context('display.width', 300):
for idx, expected in zip([idx1, idx2, idx3, idx4, idx5],
[exp1, exp2, exp3, exp4, exp5]):
for func in ['__repr__', '__unicode__', '__str__']:
result = getattr(idx, func)()
self.assertEqual(result, expected)
def test_representation_to_series(self):
idx1 = TimedeltaIndex([], freq='D')
idx2 = TimedeltaIndex(['1 days'], freq='D')
idx3 = TimedeltaIndex(['1 days', '2 days'], freq='D')
idx4 = TimedeltaIndex(['1 days', '2 days', '3 days'], freq='D')
idx5 = TimedeltaIndex(['1 days 00:00:01', '2 days', '3 days'])
exp1 = """Series([], dtype: timedelta64[ns])"""
exp2 = """0 1 days
dtype: timedelta64[ns]"""
exp3 = """0 1 days
1 2 days
dtype: timedelta64[ns]"""
exp4 = """0 1 days
1 2 days
2 3 days
dtype: timedelta64[ns]"""
exp5 = """0 1 days 00:00:01
1 2 days 00:00:00
2 3 days 00:00:00
dtype: timedelta64[ns]"""
with pd.option_context('display.width', 300):
for idx, expected in zip([idx1, idx2, idx3, idx4, idx5],
[exp1, exp2, exp3, exp4, exp5]):
result = repr(pd.Series(idx))
self.assertEqual(result, expected)
def test_repr_max_seq_item_setting(self):
# GH10182
idx = self.create_index()
idx = idx.repeat(50)
with pd.option_context("display.max_seq_items", None):
repr(idx)
self.assertFalse('...' in str(idx))
def test_isnull_for_inf(self):
s = Series(['a', np.inf, np.nan, 1.0])
with pd.option_context('mode.use_inf_as_null', True):
r = s.isnull()
dr = s.dropna()
e = Series([False, True, True, False])
de = Series(['a', 1.0], index=[0, 3])
tm.assert_series_equal(r, e)
tm.assert_series_equal(dr, de)
def test_repr_chop_threshold(self):
df = DataFrame([[0.1, 0.5], [0.5, -0.1]])
pd.reset_option("display.chop_threshold") # default None
self.assertEqual(repr(df), ' 0 1\n0 0.1 0.5\n1 0.5 -0.1')
with option_context("display.chop_threshold", 0.2):
self.assertEqual(repr(df), ' 0 1\n0 0.0 0.5\n1 0.5 0.0')
with option_context("display.chop_threshold", 0.6):
self.assertEqual(repr(df), ' 0 1\n0 0.0 0.0\n1 0.0 0.0')
with option_context("display.chop_threshold", None):
self.assertEqual(repr(df), ' 0 1\n0 0.1 0.5\n1 0.5 -0.1')
def test_repr_obeys_max_seq_limit(self):
with option_context("display.max_seq_items", 2000):
self.assertTrue(len(com.pprint_thing(lrange(1000))) > 1000)
with option_context("display.max_seq_items", 5):
self.assertTrue(len(com.pprint_thing(lrange(1000))) < 100)
def test_repr_non_interactive(self):
# in non interactive mode, there can be no dependency on the
# result of terminal auto size detection
df = DataFrame('hello', lrange(1000), lrange(5))
with option_context('mode.sim_interactive', False, 'display.width', 0,
'display.height', 0, 'display.max_rows', 5000):
self.assertFalse(has_truncated_repr(df))
self.assertFalse(has_expanded_repr(df))
def test_auto_detect(self):
term_width, term_height = get_terminal_size()
fac = 1.05 # Arbitrary large factor to exceed term widht
cols = range(int(term_width * fac))
index = range(10)
df = DataFrame(index=index, columns=cols)
with option_context('mode.sim_interactive', True):
with option_context('max_rows', None):
with option_context('max_columns', None):
# Wrap around with None
self.assertTrue(has_expanded_repr(df))
with option_context('max_rows', 0):
with option_context('max_columns', 0):
# Truncate with auto detection.
self.assertTrue(has_horizontally_truncated_repr(df))
index = range(int(term_height * fac))
df = DataFrame(index=index, columns=cols)
with option_context('max_rows', 0):
with option_context('max_columns', None):
# Wrap around with None
self.assertTrue(has_expanded_repr(df))
# Truncate vertically
self.assertTrue(has_vertically_truncated_repr(df))
with option_context('max_rows', None):
with option_context('max_columns', 0):
self.assertTrue(has_horizontally_truncated_repr(df))
def test_to_string_truncate_indices(self):
for index in [tm.makeStringIndex, tm.makeUnicodeIndex, tm.makeIntIndex,
tm.makeDateIndex, tm.makePeriodIndex]:
for column in [tm.makeStringIndex]:
for h in [10, 20]:
for w in [10, 20]:
with option_context("display.expand_frame_repr",
False):
df = DataFrame(index=index(h), columns=column(w))
with option_context("display.max_rows", 15):
if h == 20:
self.assertTrue(
has_vertically_truncated_repr(df))
else:
self.assertFalse(
has_vertically_truncated_repr(df))
with option_context("display.max_columns", 15):
if w == 20:
self.assertTrue(
has_horizontally_truncated_repr(df))
else:
self.assertFalse(
has_horizontally_truncated_repr(df))
with option_context("display.max_rows", 15,
"display.max_columns", 15):
if h == 20 and w == 20:
self.assertTrue(has_doubly_truncated_repr(
df))
else:
self.assertFalse(has_doubly_truncated_repr(
df))
def test_to_string_truncate_multilevel(self):
arrays = [['bar', 'bar', 'baz', 'baz', 'foo', 'foo', 'qux', 'qux'],
['one', 'two', 'one', 'two', 'one', 'two', 'one', 'two']]
df = DataFrame(index=arrays, columns=arrays)
with option_context("display.max_rows", 7, "display.max_columns", 7):
self.assertTrue(has_doubly_truncated_repr(df))
def test_wide_repr_wide_columns(self):
with option_context('mode.sim_interactive', True):
df = DataFrame(randn(5, 3), columns=['a' * 90, 'b' * 90, 'c' * 90])
rep_str = repr(df)
self.assertEqual(len(rep_str.splitlines()), 20)
def test_wide_repr_unicode(self):
with option_context('mode.sim_interactive', True):
max_cols = get_option('display.max_columns')
df = DataFrame(tm.rands_array(25, size=(10, max_cols - 1)))
set_option('display.expand_frame_repr', False)
rep_str = repr(df)
set_option('display.expand_frame_repr', True)
wide_repr = repr(df)
self.assertNotEqual(rep_str, wide_repr)
with option_context('display.width', 150):
wider_repr = repr(df)
self.assertTrue(len(wider_repr) < len(wide_repr))
reset_option('display.expand_frame_repr')
def test_wide_repr_wide_long_columns(self):
with option_context('mode.sim_interactive', True):
df = DataFrame({'a': ['a' * 30, 'b' * 30],
'b': ['c' * 70, 'd' * 80]})
result = repr(df)
self.assertTrue('ccccc' in result)
self.assertTrue('ddddd' in result)
def test_max_multi_index_display(self):
# GH 7101
# doc example (indexing.rst)
# multi-index
arrays = [['bar', 'bar', 'baz', 'baz', 'foo', 'foo', 'qux', 'qux'],
['one', 'two', 'one', 'two', 'one', 'two', 'one', 'two']]
tuples = list(zip(*arrays))
index = MultiIndex.from_tuples(tuples, names=['first', 'second'])
s = Series(randn(8), index=index)
with option_context("display.max_rows", 10):
self.assertEqual(len(str(s).split('\n')), 10)
with option_context("display.max_rows", 3):
self.assertEqual(len(str(s).split('\n')), 5)
with option_context("display.max_rows", 2):
self.assertEqual(len(str(s).split('\n')), 5)
with option_context("display.max_rows", 1):
self.assertEqual(len(str(s).split('\n')), 4)
with option_context("display.max_rows", 0):
self.assertEqual(len(str(s).split('\n')), 10)
# index
s = Series(randn(8), None)
with option_context("display.max_rows", 10):
self.assertEqual(len(str(s).split('\n')), 9)
with option_context("display.max_rows", 3):
self.assertEqual(len(str(s).split('\n')), 4)
with option_context("display.max_rows", 2):
self.assertEqual(len(str(s).split('\n')), 4)
with option_context("display.max_rows", 1):
self.assertEqual(len(str(s).split('\n')), 3)
with option_context("display.max_rows", 0):
self.assertEqual(len(str(s).split('\n')), 9)
# Make sure #8532 is fixed
def test_consistent_format(self):
s = pd.Series([1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0.9999, 1, 1] * 10)
with option_context("display.max_rows", 10):
res = repr(s)
exp = ('0 1.0000\n1 1.0000\n2 1.0000\n3 '
'1.0000\n4 1.0000\n ... \n125 '
'1.0000\n126 1.0000\n127 0.9999\n128 '
'1.0000\n129 1.0000\ndtype: float64')
self.assertEqual(res, exp)
def chck_ncols(self, s):
with option_context("display.max_rows", 10):
res = repr(s)
lines = res.split('\n')
lines = [line for line in repr(s).split('\n')
if not re.match('[^\.]*\.+', line)][:-1]
ncolsizes = len(set(len(line.strip()) for line in lines))
self.assertEqual(ncolsizes, 1)
def test_truncate_ndots(self):
def getndots(s):
return len(re.match('[^\.]*(\.*)', s).groups()[0])
s = Series([0, 2, 3, 6])
with option_context("display.max_rows", 2):
strrepr = repr(s).replace('\n', '')
self.assertEqual(getndots(strrepr), 2)
s = Series([0, 100, 200, 400])
with option_context("display.max_rows", 2):
strrepr = repr(s).replace('\n', '')
self.assertEqual(getndots(strrepr), 3)
def test_output_significant_digits(self):
# Issue #9764
# In case default display precision changes:
with pd.option_context('display.precision', 6):
# DataFrame example from issue #9764
d = pd.DataFrame(
{'col1': [9.999e-8, 1e-7, 1.0001e-7, 2e-7, 4.999e-7, 5e-7,
5.0001e-7, 6e-7, 9.999e-7, 1e-6, 1.0001e-6, 2e-6,
4.999e-6, 5e-6, 5.0001e-6, 6e-6]})
expected_output = {
(0, 6):
' col1\n0 9.999000e-08\n1 1.000000e-07\n2 1.000100e-07\n3 2.000000e-07\n4 4.999000e-07\n5 5.000000e-07',
(1, 6):
' col1\n1 1.000000e-07\n2 1.000100e-07\n3 2.000000e-07\n4 4.999000e-07\n5 5.000000e-07',
(1, 8):
' col1\n1 1.000000e-07\n2 1.000100e-07\n3 2.000000e-07\n4 4.999000e-07\n5 5.000000e-07\n6 5.000100e-07\n7 6.000000e-07',
(8, 16):
' col1\n8 9.999000e-07\n9 1.000000e-06\n10 1.000100e-06\n11 2.000000e-06\n12 4.999000e-06\n13 5.000000e-06\n14 5.000100e-06\n15 6.000000e-06',
(9, 16):
' col1\n9 0.000001\n10 0.000001\n11 0.000002\n12 0.000005\n13 0.000005\n14 0.000005\n15 0.000006'
}
for (start, stop), v in expected_output.items():
self.assertEqual(str(d[start:stop]), v)
def test_too_long(self):
# GH 10451
with pd.option_context('display.precision', 4):
# need both a number > 1e6 and something that normally formats to
# having length > display.precision + 6
df = pd.DataFrame(dict(x=[12345.6789]))
self.assertEqual(str(df), ' x\n0 12345.6789')
df = pd.DataFrame(dict(x=[2e6]))
self.assertEqual(str(df), ' x\n0 2000000.0')
df = pd.DataFrame(dict(x=[12345.6789, 2e6]))
self.assertEqual(
str(df), ' x\n0 1.2346e+04\n1 2.0000e+06')
def test_precision(self):
with pd.option_context('display.precision', 10):
s = Styler(self.df)
self.assertEqual(s.precision, 10)
s = Styler(self.df, precision=2)
self.assertEqual(s.precision, 2)
s2 = s.set_precision(4)
self.assertTrue(s is s2)
self.assertEqual(s.precision, 4)
def _main(log_path, show_browser=False):
print(log_path)
df, metadata = process_log(log_path)
del df['Timestamp']
df['Msg Type'] = df['Msg Type'].apply(escape_html_chars)
df['Message'] = df['Message'].apply(escape_html_chars)
# df['Message'] = df['Message'].apply(try_json)
df['Message'] = df.apply(lambda row: format_error(row['Msg Type'], row['Message']), 1)
df['Rev ID'] = df['Rev ID'].apply(lambda x: '<a href="https://www.wikidata.org/w/index.php?oldid={}&diff=prev">{}</a>'.format(x,x) if x else x)
level_counts, info_counts, warning_counts, error_counts = generate_summary(df)
warnings_df = df.query("Level == 'WARNING'")
warnings_df.is_copy = False
del warnings_df['Level']
if not warnings_df.empty:
warnings_df = gen_ext_id_links(warnings_df)
warnings_df = url_qid(warnings_df, "QID")
errors_df = df.query("Level == 'ERROR'")
errors_df.is_copy = False
del errors_df['Level']
if not errors_df.empty:
errors_df = gen_ext_id_links(errors_df)
errors_df = url_qid(errors_df, "QID")
# errors_df['Message'] = errors_df['Message'].apply(try_format_error)
info_df = df.query("Level == 'INFO'")
info_df.is_copy = False
del info_df['Level']
if not info_df.empty:
info_df = gen_ext_id_links(info_df)
info_df = url_qid(info_df, "QID")
info_df.Message = info_df.Message.str.replace("SKIP", "No Action")
with pd.option_context('display.max_colwidth', -1):
# this class nonsense is an ugly hack: https://stackoverflow.com/questions/15079118/js-datatables-from-pandas/41536906
level_counts = level_counts.to_frame().to_html(escape=False)
info_counts = info_counts.to_frame().to_html(escape=False)
warning_counts = warning_counts.to_frame().to_html(escape=False)
error_counts = error_counts.to_frame().to_html(escape=False)
info_df = info_df.to_html(escape=False, classes='df" id = "info_df')
warnings_df = warnings_df.to_html(escape=False, classes='df" id = "warning_df')
errors_df = errors_df.to_html(escape=False, classes='df" id = "error_df')
template = Template(open(os.path.join(sys.path[0], "template.html")).read())
s = template.render(name=metadata['name'], run_id=metadata['run_id'],
level_counts=level_counts,
info_counts=info_counts,
warning_counts=warning_counts,
error_counts=error_counts,
warnings_df=warnings_df, errors_df=errors_df, info_df=info_df)
out_path = log_path.rsplit(".", 1)[0] + ".html"
with open(out_path, 'w') as f:
f.write(s)
if show_browser:
webbrowser.open(out_path)
def main():
parser = argparse.ArgumentParser()
parser.add_argument('logfiles', type=str, nargs='+')
parser.add_argument('--fields', type=str, default='ret,avglen,ent,kl,vf_r2,ttotal')
parser.add_argument('--noplot', action='store_true')
parser.add_argument('--plotfile', type=str, default=None)
parser.add_argument('--range_end', type=int, default=None)
args = parser.parse_args()
assert len(set(args.logfiles)) == len(args.logfiles), 'Log files must be unique'
fields = args.fields.split(',')
# Load logs from all files
fname2log = {}
for fname in args.logfiles:
if ':' in fname:
os.system('rsync -avrz {} /tmp'.format(fname))
fname = os.path.join('/tmp', os.path.basename(fname))
with pd.HDFStore(fname, 'r') as f:
assert fname not in fname2log
df = f['log']
df.set_index('iter', inplace=True)
fname2log[fname] = df.loc[:args.range_end, fields]
# Print
if not args.noplot or args.plotfile is not None:
import matplotlib
if args.plotfile is not None:
matplotlib.use('Agg')
import matplotlib.pyplot as plt
plt.style.use('seaborn-colorblind')
ax = None
for fname, df in fname2log.items():
with pd.option_context('display.max_rows', 9999):
print(fname)
print(df[-1:])
if 'vf_r2' in df.keys():
df['vf_r2'] = np.maximum(0, df['vf_r2'])
if not args.noplot:
if ax is None:
ax = df.plot(subplots=True, title=','.join(args.logfiles))
else:
df.plot(subplots=True, title=','.join(args.logfiles), ax=ax, legend=False)
if args.plotfile is not None:
plt.savefig(args.plotfile, transparent=True, bbox_inches='tight', dpi=300)
elif not args.noplot:
plt.show()
def transform(self, X, y=None):
# Suppress SettingWithCopyWarning (alternatively: add a X = X.copy()
with pd.option_context('mode.chained_assignment', None):
# --- Convert Embarked
mapping = {'S': 0,
'C': 1,
'Q': 2,
}
X.loc[:, 'Embarked'] = X.loc[:, 'Embarked'].replace(mapping, inplace=False)
# --- Convert Sex
mapping = {'female': 0,
'male': 1
}
X.loc[:, 'Sex'] = X['Sex'].replace(mapping, inplace=False)
# --- Convert Name to Title
X.loc[:, 'Title'] = X['Name'].map(lambda name: name.split(',')[1].split('.')[0].strip())
# a map of more aggregated titles
mapping = {
"Capt": 0, # Officer
"Col": 0, # Officer
"Major": 0, # Officer
"Jonkheer": 1, # Royalty
"Don": 1, # Royalty
"Sir": 1, # Royalty
"Dr": 0, # Officer
"Rev": 0, # Officer
"the Countess": 1, # Royalty
"Dona": 1, # Royalty
"Mme": 2, # "Mrs"
"Mlle": 3, # "Miss"
"Ms": 2, # "Mrs"
"Mr": 4, # "Mr"
"Mrs": 2, # "Mrs"
"Miss": 3, # "Miss"
"Master": 5, # "Master"
"Lady": 1 # "Royalty"
}
X.loc[:, 'Title'] = X['Title'].map(mapping)
X = X.drop('Name', 1)
return X
def test_representation_to_series(self):
idx1 = DatetimeIndex([], freq='D')
idx2 = DatetimeIndex(['2011-01-01'], freq='D')
idx3 = DatetimeIndex(['2011-01-01', '2011-01-02'], freq='D')
idx4 = DatetimeIndex(
['2011-01-01', '2011-01-02', '2011-01-03'], freq='D')
idx5 = DatetimeIndex(['2011-01-01 09:00', '2011-01-01 10:00',
'2011-01-01 11:00'], freq='H', tz='Asia/Tokyo')
idx6 = DatetimeIndex(['2011-01-01 09:00', '2011-01-01 10:00', pd.NaT],
tz='US/Eastern')
idx7 = DatetimeIndex(['2011-01-01 09:00', '2011-01-02 10:15'])
exp1 = """Series([], dtype: datetime64[ns])"""
exp2 = """0 2011-01-01
dtype: datetime64[ns]"""
exp3 = """0 2011-01-01
1 2011-01-02
dtype: datetime64[ns]"""
exp4 = """0 2011-01-01
1 2011-01-02
2 2011-01-03
dtype: datetime64[ns]"""
exp5 = """0 2011-01-01 09:00:00+09:00
1 2011-01-01 10:00:00+09:00
2 2011-01-01 11:00:00+09:00
dtype: datetime64[ns, Asia/Tokyo]"""
exp6 = """0 2011-01-01 09:00:00-05:00
1 2011-01-01 10:00:00-05:00
2 NaT
dtype: datetime64[ns, US/Eastern]"""
exp7 = """0 2011-01-01 09:00:00
1 2011-01-02 10:15:00
dtype: datetime64[ns]"""
with pd.option_context('display.width', 300):
for idx, expected in zip([idx1, idx2, idx3, idx4,
idx5, idx6, idx7],
[exp1, exp2, exp3, exp4,
exp5, exp6, exp7]):
result = repr(Series(idx))
self.assertEqual(result, expected)
def test_repr_max_columns_max_rows(self):
term_width, term_height = get_terminal_size()
if term_width < 10 or term_height < 10:
raise nose.SkipTest("terminal size too small, "
"{0} x {1}".format(term_width, term_height))
def mkframe(n):
index = ['%05d' % i for i in range(n)]
return DataFrame(0, index, index)
df6 = mkframe(6)
df10 = mkframe(10)
with option_context('mode.sim_interactive', True):
with option_context('display.width', term_width * 2):
with option_context('display.max_rows', 5,
'display.max_columns', 5):
self.assertFalse(has_expanded_repr(mkframe(4)))
self.assertFalse(has_expanded_repr(mkframe(5)))
self.assertFalse(has_expanded_repr(df6))
self.assertTrue(has_doubly_truncated_repr(df6))
with option_context('display.max_rows', 20,
'display.max_columns', 10):
# Out off max_columns boundary, but no extending
# since not exceeding width
self.assertFalse(has_expanded_repr(df6))
self.assertFalse(has_truncated_repr(df6))
with option_context('display.max_rows', 9,
'display.max_columns', 10):
# out vertical bounds can not result in exanded repr
self.assertFalse(has_expanded_repr(df10))
self.assertTrue(has_vertically_truncated_repr(df10))
# width=None in terminal, auto detection
with option_context('display.max_columns', 100, 'display.max_rows',
term_width * 20, 'display.width', None):
df = mkframe((term_width // 7) - 2)
self.assertFalse(has_expanded_repr(df))
df = mkframe((term_width // 7) + 2)
com.pprint_thing(df._repr_fits_horizontal_())
self.assertTrue(has_expanded_repr(df))
def to_clipboard(obj, excel=None, sep=None, **kwargs): # pragma: no cover
"""
Attempt to write text representation of object to the system clipboard
The clipboard can be then pasted into Excel for example.
Parameters
----------
obj : the object to write to the clipboard
excel : boolean, defaults to True
if True, use the provided separator, writing in a csv
format for allowing easy pasting into excel.
if False, write a string representation of the object
to the clipboard
sep : optional, defaults to tab
other keywords are passed to to_csv
Notes
-----
Requirements for your platform
- Linux: xclip, or xsel (with gtk or PyQt4 modules)
- Windows:
- OS X:
"""
from pandas.util.clipboard import clipboard_set
if excel is None:
excel = True
if excel:
try:
if sep is None:
sep = '\t'
buf = StringIO()
obj.to_csv(buf, sep=sep, **kwargs)
clipboard_set(buf.getvalue())
return
except:
pass
if isinstance(obj, DataFrame):
# str(df) has various unhelpful defaults, like truncation
with option_context('display.max_colwidth', 999999):
objstr = obj.to_string(**kwargs)
else:
objstr = str(obj)
clipboard_set(objstr)
def main():
parser = argparse.ArgumentParser()
parser.add_argument('logfiles', type=str, nargs='+')
parser.add_argument('--fields', type=str, default='trueret,avglen,ent,kl,vf_r2,vf_kl,tdvf_r2,rloss,racc')
parser.add_argument('--noplot', action='store_true')
parser.add_argument('--plotfile', type=str, default=None)
parser.add_argument('--range_end', type=int, default=None)
args = parser.parse_args()
assert len(set(args.logfiles)) == len(args.logfiles), 'Log files must be unique'
fields = args.fields.split(',')
# Load logs from all files
fname2log = {}
for fname in args.logfiles:
with pd.HDFStore(fname, 'r') as f:
assert fname not in fname2log
df = f['log']
df.set_index('iter', inplace=True)
fname2log[fname] = df.loc[:args.range_end, fields]
# Print stuff
if not args.noplot or args.plotfile is not None:
import matplotlib
if args.plotfile is not None:
matplotlib.use('Agg')
import matplotlib.pyplot as plt; plt.style.use('ggplot')
ax = None
for fname, df in fname2log.items():
with pd.option_context('display.max_rows', 9999):
print fname
print df[-1:]
df['vf_r2'] = np.maximum(0,df['vf_r2'])
if ax is None:
ax = df.plot(subplots=True, title=fname)
else:
df.plot(subplots=True, title=fname, ax=ax, legend=False)
if not args.noplot:
plt.show()
if args.plotfile is not None:
plt.savefig(args.plotfile, bbox_inches='tight', dpi=200)
def _print_df_scores(df_scores, score_types, indent=''):
"""Pretty print the scores dataframe.
Parameters
----------
df_scores : pd.DataFrame
the score dataframe
score_types : list of score types
a list of score types to use
indent : str, default=''
indentation if needed
"""
try:
# try to re-order columns/rows in the printed array
# we may not have all train, valid, test, so need to select
index_order = np.array(['train', 'valid', 'test'])
ordered_index = index_order[np.isin(index_order, df_scores.index)]
df_scores = df_scores.loc[
ordered_index, [score_type.name for score_type in score_types]]
except Exception:
_print_warning("Couldn't re-order the score matrix..")
with pd.option_context("display.width", 160):
df_repr = repr(df_scores)
df_repr_out = []
for line, color_key in zip(df_repr.splitlines(),
[None, None] +
list(df_scores.index.values)):
if line.strip() == 'step':
continue
if color_key is None:
# table header
line = stylize(line, fg(fg_colors['title']) + attr('bold'))
if color_key is not None:
tokens = line.split()
tokens_bak = tokens[:]
if 'official_' + color_key in fg_colors:
# line label and official score bold & bright
label_color = fg(fg_colors['official_' + color_key])
tokens[0] = stylize(tokens[0], label_color + attr('bold'))
tokens[1] = stylize(tokens[1], label_color + attr('bold'))
if color_key in fg_colors:
# other scores pale
tokens[2:] = [stylize(token, fg(fg_colors[color_key]))
for token in tokens[2:]]
for token_from, token_to in zip(tokens_bak, tokens):
line = line.replace(token_from, token_to)
line = indent + line
df_repr_out.append(line)
print('\n'.join(df_repr_out))