我们从Python开源项目中,提取了以下42个代码示例,用于说明如何使用pandas.option_context()。
def summary(self): """ Print the summary of the Process model. :return: None """ with pd.option_context("precision", self.options["precision"]): full_model = self.outcome_models[self.iv] m_models = [self.outcome_models.get(med_name) for med_name in self.mediators] if self.options["detail"]: print("\n***************************** OUTCOME MODELS ****************************\n") print(full_model) print("\n-------------------------------------------------------------------------\n") for med_model in m_models: print(med_model) print("\n-------------------------------------------------------------------------\n") if self.indirect_model: print("\n********************** DIRECT AND INDIRECT EFFECTS **********************\n") print(self.direct_model) print(self.indirect_model) else: print("\n********************** CONDITIONAL EFFECTS **********************\n") print(self.direct_model)
def test_show_null_counts(self): df = DataFrame(1, columns=range(10), index=range(10)) df.iloc[1, 1] = np.nan def check(null_counts, result): buf = StringIO() df.info(buf=buf, null_counts=null_counts) self.assertTrue(('non-null' in buf.getvalue()) is result) with option_context('display.max_info_rows', 20, 'display.max_info_columns', 20): check(None, True) check(True, True) check(False, False) with option_context('display.max_info_rows', 5, 'display.max_info_columns', 5): check(None, False) check(True, False) check(False, False)
def test_repr_truncation(self): max_len = 20 with option_context("display.max_colwidth", max_len): df = DataFrame({'A': np.random.randn(10), 'B': [tm.rands(np.random.randint( max_len - 1, max_len + 1)) for i in range(10) ]}) r = repr(df) r = r[r.find('\n') + 1:] adj = fmt._get_adjustment() for line, value in lzip(r.split('\n'), df['B']): if adj.len(value) + 1 > max_len: self.assertIn('...', line) else: self.assertNotIn('...', line) with option_context("display.max_colwidth", 999999): self.assertNotIn('...', repr(df)) with option_context("display.max_colwidth", max_len + 2): self.assertNotIn('...', repr(df))
def test_expand_frame_repr(self): df_small = DataFrame('hello', [0], [0]) df_wide = DataFrame('hello', [0], lrange(10)) df_tall = DataFrame('hello', lrange(30), lrange(5)) with option_context('mode.sim_interactive', True): with option_context('display.max_columns', 10, 'display.width', 20, 'display.max_rows', 20, 'display.show_dimensions', True): with option_context('display.expand_frame_repr', True): self.assertFalse(has_truncated_repr(df_small)) self.assertFalse(has_expanded_repr(df_small)) self.assertFalse(has_truncated_repr(df_wide)) self.assertTrue(has_expanded_repr(df_wide)) self.assertTrue(has_vertically_truncated_repr(df_tall)) self.assertTrue(has_expanded_repr(df_tall)) with option_context('display.expand_frame_repr', False): self.assertFalse(has_truncated_repr(df_small)) self.assertFalse(has_expanded_repr(df_small)) self.assertFalse(has_horizontally_truncated_repr(df_wide)) self.assertFalse(has_expanded_repr(df_wide)) self.assertTrue(has_vertically_truncated_repr(df_tall)) self.assertFalse(has_expanded_repr(df_tall))
def test_str_max_colwidth(self): # GH 7856 df = pd.DataFrame([{'a': 'foo', 'b': 'bar', 'c': 'uncomfortably long line with lots of stuff', 'd': 1}, {'a': 'foo', 'b': 'bar', 'c': 'stuff', 'd': 1}]) df.set_index(['a', 'b', 'c']) self.assertTrue( str(df) == ' a b c d\n' '0 foo bar uncomfortably long line with lots of stuff 1\n' '1 foo bar stuff 1') with option_context('max_colwidth', 20): self.assertTrue(str(df) == ' a b c d\n' '0 foo bar uncomfortably lo... 1\n' '1 foo bar stuff 1')
def test_wide_repr(self): with option_context('mode.sim_interactive', True, 'display.show_dimensions', True): max_cols = get_option('display.max_columns') df = DataFrame(tm.rands_array(25, size=(10, max_cols - 1))) set_option('display.expand_frame_repr', False) rep_str = repr(df) assert "10 rows x %d columns" % (max_cols - 1) in rep_str set_option('display.expand_frame_repr', True) wide_repr = repr(df) self.assertNotEqual(rep_str, wide_repr) with option_context('display.width', 120): wider_repr = repr(df) self.assertTrue(len(wider_repr) < len(wide_repr)) reset_option('display.expand_frame_repr')
def test_wide_repr_named(self): with option_context('mode.sim_interactive', True): max_cols = get_option('display.max_columns') df = DataFrame(tm.rands_array(25, size=(10, max_cols - 1))) df.index.name = 'DataFrame Index' set_option('display.expand_frame_repr', False) rep_str = repr(df) set_option('display.expand_frame_repr', True) wide_repr = repr(df) self.assertNotEqual(rep_str, wide_repr) with option_context('display.width', 150): wider_repr = repr(df) self.assertTrue(len(wider_repr) < len(wide_repr)) for line in wide_repr.splitlines()[1::13]: self.assertIn('DataFrame Index', line) reset_option('display.expand_frame_repr')
def test_wide_repr_multiindex_cols(self): with option_context('mode.sim_interactive', True): max_cols = get_option('display.max_columns') midx = MultiIndex.from_arrays(tm.rands_array(5, size=(2, 10))) mcols = MultiIndex.from_arrays(tm.rands_array(3, size=(2, max_cols - 1))) df = DataFrame(tm.rands_array(25, (10, max_cols - 1)), index=midx, columns=mcols) df.index.names = ['Level 0', 'Level 1'] set_option('display.expand_frame_repr', False) rep_str = repr(df) set_option('display.expand_frame_repr', True) wide_repr = repr(df) self.assertNotEqual(rep_str, wide_repr) with option_context('display.width', 150): wider_repr = repr(df) self.assertTrue(len(wider_repr) < len(wide_repr)) reset_option('display.expand_frame_repr')
def test_show_dimensions(self): df = DataFrame(123, lrange(10, 15), lrange(30)) with option_context('display.max_rows', 10, 'display.max_columns', 40, 'display.width', 500, 'display.expand_frame_repr', 'info', 'display.show_dimensions', True): self.assertTrue('5 rows' in str(df)) self.assertTrue('5 rows' in df._repr_html_()) with option_context('display.max_rows', 10, 'display.max_columns', 40, 'display.width', 500, 'display.expand_frame_repr', 'info', 'display.show_dimensions', False): self.assertFalse('5 rows' in str(df)) self.assertFalse('5 rows' in df._repr_html_()) with option_context('display.max_rows', 2, 'display.max_columns', 2, 'display.width', 500, 'display.expand_frame_repr', 'info', 'display.show_dimensions', 'truncate'): self.assertTrue('5 rows' in str(df)) self.assertTrue('5 rows' in df._repr_html_()) with option_context('display.max_rows', 10, 'display.max_columns', 40, 'display.width', 500, 'display.expand_frame_repr', 'info', 'display.show_dimensions', 'truncate'): self.assertFalse('5 rows' in str(df)) self.assertFalse('5 rows' in df._repr_html_())
def test_info_repr(self): max_rows = get_option('display.max_rows') max_cols = get_option('display.max_columns') # Long h, w = max_rows + 1, max_cols - 1 df = DataFrame(dict((k, np.arange(1, 1 + h)) for k in np.arange(w))) assert has_vertically_truncated_repr(df) with option_context('display.large_repr', 'info'): assert has_info_repr(df) # Wide h, w = max_rows - 1, max_cols + 1 df = DataFrame(dict((k, np.arange(1, 1 + h)) for k in np.arange(w))) assert has_horizontally_truncated_repr(df) with option_context('display.large_repr', 'info'): assert has_info_repr(df)
def test_info_repr_html(self): max_rows = get_option('display.max_rows') max_cols = get_option('display.max_columns') # Long h, w = max_rows + 1, max_cols - 1 df = DataFrame(dict((k, np.arange(1, 1 + h)) for k in np.arange(w))) assert r'<class' not in df._repr_html_() with option_context('display.large_repr', 'info'): assert r'<class' in df._repr_html_() # Wide h, w = max_rows - 1, max_cols + 1 df = DataFrame(dict((k, np.arange(1, 1 + h)) for k in np.arange(w))) assert '<class' not in df._repr_html_() with option_context('display.large_repr', 'info'): assert '<class' in df._repr_html_()
def test_format_explicit(self): test_sers = self.gen_test_series() with option_context("display.max_rows", 4): res = repr(test_sers['onel']) exp = '0 a\n1 a\n ..\n98 a\n99 a\ndtype: object' self.assertEqual(exp, res) res = repr(test_sers['twol']) exp = ('0 ab\n1 ab\n ..\n98 ab\n99 ab\ndtype:' ' object') self.assertEqual(exp, res) res = repr(test_sers['asc']) exp = ('0 a\n1 ab\n ... \n4 abcde\n5' ' abcdef\ndtype: object') self.assertEqual(exp, res) res = repr(test_sers['desc']) exp = ('5 abcdef\n4 abcde\n ... \n1 ab\n0' ' a\ndtype: object') self.assertEqual(exp, res)
def df_to_html(df, border=0, classes=('table', 'table-striped', 'table-hover'), **kwargs): """Convert a dataframe to HTML without truncating contents. pandas will truncate cell contents that exceed 50 characters by default. Use this function to avoid this truncation behavior. This function uses different default parameters than `DataFrame.to_html` to give uniform styling to HTML tables that are compatible with q2template themes. These parameters can be overridden, and they (along with any other parameters) will be passed through to `DataFrame.to_html`. Parameters ---------- df : pd.DataFrame DataFrame to convert to HTML. kwargs : dict Parameters passed through to `pd.DataFrame.to_html`. Returns ------- str DataFrame converted to HTML. References ---------- .. [1] https://stackoverflow.com/q/26277757/3776794 .. [2] https://github.com/pandas-dev/pandas/issues/1852 """ with pd.option_context('display.max_colwidth', -1): return df.to_html(border=border, classes=classes, **kwargs)
def test_representation(self): idx = [] idx.append(DatetimeIndex([], freq='D')) idx.append(DatetimeIndex(['2011-01-01'], freq='D')) idx.append(DatetimeIndex(['2011-01-01', '2011-01-02'], freq='D')) idx.append(DatetimeIndex( ['2011-01-01', '2011-01-02', '2011-01-03'], freq='D')) idx.append(DatetimeIndex( ['2011-01-01 09:00', '2011-01-01 10:00', '2011-01-01 11:00' ], freq='H', tz='Asia/Tokyo')) idx.append(DatetimeIndex( ['2011-01-01 09:00', '2011-01-01 10:00', pd.NaT], tz='US/Eastern')) idx.append(DatetimeIndex( ['2011-01-01 09:00', '2011-01-01 10:00', pd.NaT], tz='UTC')) exp = [] exp.append("""DatetimeIndex([], dtype='datetime64[ns]', freq='D')""") exp.append("DatetimeIndex(['2011-01-01'], dtype='datetime64[ns]', " "freq='D')") exp.append("DatetimeIndex(['2011-01-01', '2011-01-02'], " "dtype='datetime64[ns]', freq='D')") exp.append("DatetimeIndex(['2011-01-01', '2011-01-02', '2011-01-03'], " "dtype='datetime64[ns]', freq='D')") exp.append("DatetimeIndex(['2011-01-01 09:00:00+09:00', " "'2011-01-01 10:00:00+09:00', '2011-01-01 11:00:00+09:00']" ", dtype='datetime64[ns, Asia/Tokyo]', freq='H')") exp.append("DatetimeIndex(['2011-01-01 09:00:00-05:00', " "'2011-01-01 10:00:00-05:00', 'NaT'], " "dtype='datetime64[ns, US/Eastern]', freq=None)") exp.append("DatetimeIndex(['2011-01-01 09:00:00+00:00', " "'2011-01-01 10:00:00+00:00', 'NaT'], " "dtype='datetime64[ns, UTC]', freq=None)""") with pd.option_context('display.width', 300): for indx, expected in zip(idx, exp): for func in ['__repr__', '__unicode__', '__str__']: result = getattr(indx, func)() self.assertEqual(result, expected)
def test_representation(self): idx1 = TimedeltaIndex([], freq='D') idx2 = TimedeltaIndex(['1 days'], freq='D') idx3 = TimedeltaIndex(['1 days', '2 days'], freq='D') idx4 = TimedeltaIndex(['1 days', '2 days', '3 days'], freq='D') idx5 = TimedeltaIndex(['1 days 00:00:01', '2 days', '3 days']) exp1 = """TimedeltaIndex([], dtype='timedelta64[ns]', freq='D')""" exp2 = ("TimedeltaIndex(['1 days'], dtype='timedelta64[ns]', " "freq='D')") exp3 = ("TimedeltaIndex(['1 days', '2 days'], " "dtype='timedelta64[ns]', freq='D')") exp4 = ("TimedeltaIndex(['1 days', '2 days', '3 days'], " "dtype='timedelta64[ns]', freq='D')") exp5 = ("TimedeltaIndex(['1 days 00:00:01', '2 days 00:00:00', " "'3 days 00:00:00'], dtype='timedelta64[ns]', freq=None)") with pd.option_context('display.width', 300): for idx, expected in zip([idx1, idx2, idx3, idx4, idx5], [exp1, exp2, exp3, exp4, exp5]): for func in ['__repr__', '__unicode__', '__str__']: result = getattr(idx, func)() self.assertEqual(result, expected)
def test_representation_to_series(self): idx1 = TimedeltaIndex([], freq='D') idx2 = TimedeltaIndex(['1 days'], freq='D') idx3 = TimedeltaIndex(['1 days', '2 days'], freq='D') idx4 = TimedeltaIndex(['1 days', '2 days', '3 days'], freq='D') idx5 = TimedeltaIndex(['1 days 00:00:01', '2 days', '3 days']) exp1 = """Series([], dtype: timedelta64[ns])""" exp2 = """0 1 days dtype: timedelta64[ns]""" exp3 = """0 1 days 1 2 days dtype: timedelta64[ns]""" exp4 = """0 1 days 1 2 days 2 3 days dtype: timedelta64[ns]""" exp5 = """0 1 days 00:00:01 1 2 days 00:00:00 2 3 days 00:00:00 dtype: timedelta64[ns]""" with pd.option_context('display.width', 300): for idx, expected in zip([idx1, idx2, idx3, idx4, idx5], [exp1, exp2, exp3, exp4, exp5]): result = repr(pd.Series(idx)) self.assertEqual(result, expected)
def test_repr_max_seq_item_setting(self): # GH10182 idx = self.create_index() idx = idx.repeat(50) with pd.option_context("display.max_seq_items", None): repr(idx) self.assertFalse('...' in str(idx))
def test_isnull_for_inf(self): s = Series(['a', np.inf, np.nan, 1.0]) with pd.option_context('mode.use_inf_as_null', True): r = s.isnull() dr = s.dropna() e = Series([False, True, True, False]) de = Series(['a', 1.0], index=[0, 3]) tm.assert_series_equal(r, e) tm.assert_series_equal(dr, de)
def test_repr_chop_threshold(self): df = DataFrame([[0.1, 0.5], [0.5, -0.1]]) pd.reset_option("display.chop_threshold") # default None self.assertEqual(repr(df), ' 0 1\n0 0.1 0.5\n1 0.5 -0.1') with option_context("display.chop_threshold", 0.2): self.assertEqual(repr(df), ' 0 1\n0 0.0 0.5\n1 0.5 0.0') with option_context("display.chop_threshold", 0.6): self.assertEqual(repr(df), ' 0 1\n0 0.0 0.0\n1 0.0 0.0') with option_context("display.chop_threshold", None): self.assertEqual(repr(df), ' 0 1\n0 0.1 0.5\n1 0.5 -0.1')
def test_repr_obeys_max_seq_limit(self): with option_context("display.max_seq_items", 2000): self.assertTrue(len(com.pprint_thing(lrange(1000))) > 1000) with option_context("display.max_seq_items", 5): self.assertTrue(len(com.pprint_thing(lrange(1000))) < 100)
def test_repr_non_interactive(self): # in non interactive mode, there can be no dependency on the # result of terminal auto size detection df = DataFrame('hello', lrange(1000), lrange(5)) with option_context('mode.sim_interactive', False, 'display.width', 0, 'display.height', 0, 'display.max_rows', 5000): self.assertFalse(has_truncated_repr(df)) self.assertFalse(has_expanded_repr(df))
def test_auto_detect(self): term_width, term_height = get_terminal_size() fac = 1.05 # Arbitrary large factor to exceed term widht cols = range(int(term_width * fac)) index = range(10) df = DataFrame(index=index, columns=cols) with option_context('mode.sim_interactive', True): with option_context('max_rows', None): with option_context('max_columns', None): # Wrap around with None self.assertTrue(has_expanded_repr(df)) with option_context('max_rows', 0): with option_context('max_columns', 0): # Truncate with auto detection. self.assertTrue(has_horizontally_truncated_repr(df)) index = range(int(term_height * fac)) df = DataFrame(index=index, columns=cols) with option_context('max_rows', 0): with option_context('max_columns', None): # Wrap around with None self.assertTrue(has_expanded_repr(df)) # Truncate vertically self.assertTrue(has_vertically_truncated_repr(df)) with option_context('max_rows', None): with option_context('max_columns', 0): self.assertTrue(has_horizontally_truncated_repr(df))
def test_to_string_truncate_indices(self): for index in [tm.makeStringIndex, tm.makeUnicodeIndex, tm.makeIntIndex, tm.makeDateIndex, tm.makePeriodIndex]: for column in [tm.makeStringIndex]: for h in [10, 20]: for w in [10, 20]: with option_context("display.expand_frame_repr", False): df = DataFrame(index=index(h), columns=column(w)) with option_context("display.max_rows", 15): if h == 20: self.assertTrue( has_vertically_truncated_repr(df)) else: self.assertFalse( has_vertically_truncated_repr(df)) with option_context("display.max_columns", 15): if w == 20: self.assertTrue( has_horizontally_truncated_repr(df)) else: self.assertFalse( has_horizontally_truncated_repr(df)) with option_context("display.max_rows", 15, "display.max_columns", 15): if h == 20 and w == 20: self.assertTrue(has_doubly_truncated_repr( df)) else: self.assertFalse(has_doubly_truncated_repr( df))
def test_to_string_truncate_multilevel(self): arrays = [['bar', 'bar', 'baz', 'baz', 'foo', 'foo', 'qux', 'qux'], ['one', 'two', 'one', 'two', 'one', 'two', 'one', 'two']] df = DataFrame(index=arrays, columns=arrays) with option_context("display.max_rows", 7, "display.max_columns", 7): self.assertTrue(has_doubly_truncated_repr(df))
def test_wide_repr_wide_columns(self): with option_context('mode.sim_interactive', True): df = DataFrame(randn(5, 3), columns=['a' * 90, 'b' * 90, 'c' * 90]) rep_str = repr(df) self.assertEqual(len(rep_str.splitlines()), 20)
def test_wide_repr_unicode(self): with option_context('mode.sim_interactive', True): max_cols = get_option('display.max_columns') df = DataFrame(tm.rands_array(25, size=(10, max_cols - 1))) set_option('display.expand_frame_repr', False) rep_str = repr(df) set_option('display.expand_frame_repr', True) wide_repr = repr(df) self.assertNotEqual(rep_str, wide_repr) with option_context('display.width', 150): wider_repr = repr(df) self.assertTrue(len(wider_repr) < len(wide_repr)) reset_option('display.expand_frame_repr')
def test_wide_repr_wide_long_columns(self): with option_context('mode.sim_interactive', True): df = DataFrame({'a': ['a' * 30, 'b' * 30], 'b': ['c' * 70, 'd' * 80]}) result = repr(df) self.assertTrue('ccccc' in result) self.assertTrue('ddddd' in result)
def test_max_multi_index_display(self): # GH 7101 # doc example (indexing.rst) # multi-index arrays = [['bar', 'bar', 'baz', 'baz', 'foo', 'foo', 'qux', 'qux'], ['one', 'two', 'one', 'two', 'one', 'two', 'one', 'two']] tuples = list(zip(*arrays)) index = MultiIndex.from_tuples(tuples, names=['first', 'second']) s = Series(randn(8), index=index) with option_context("display.max_rows", 10): self.assertEqual(len(str(s).split('\n')), 10) with option_context("display.max_rows", 3): self.assertEqual(len(str(s).split('\n')), 5) with option_context("display.max_rows", 2): self.assertEqual(len(str(s).split('\n')), 5) with option_context("display.max_rows", 1): self.assertEqual(len(str(s).split('\n')), 4) with option_context("display.max_rows", 0): self.assertEqual(len(str(s).split('\n')), 10) # index s = Series(randn(8), None) with option_context("display.max_rows", 10): self.assertEqual(len(str(s).split('\n')), 9) with option_context("display.max_rows", 3): self.assertEqual(len(str(s).split('\n')), 4) with option_context("display.max_rows", 2): self.assertEqual(len(str(s).split('\n')), 4) with option_context("display.max_rows", 1): self.assertEqual(len(str(s).split('\n')), 3) with option_context("display.max_rows", 0): self.assertEqual(len(str(s).split('\n')), 9) # Make sure #8532 is fixed
def test_consistent_format(self): s = pd.Series([1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0.9999, 1, 1] * 10) with option_context("display.max_rows", 10): res = repr(s) exp = ('0 1.0000\n1 1.0000\n2 1.0000\n3 ' '1.0000\n4 1.0000\n ... \n125 ' '1.0000\n126 1.0000\n127 0.9999\n128 ' '1.0000\n129 1.0000\ndtype: float64') self.assertEqual(res, exp)
def chck_ncols(self, s): with option_context("display.max_rows", 10): res = repr(s) lines = res.split('\n') lines = [line for line in repr(s).split('\n') if not re.match('[^\.]*\.+', line)][:-1] ncolsizes = len(set(len(line.strip()) for line in lines)) self.assertEqual(ncolsizes, 1)
def test_truncate_ndots(self): def getndots(s): return len(re.match('[^\.]*(\.*)', s).groups()[0]) s = Series([0, 2, 3, 6]) with option_context("display.max_rows", 2): strrepr = repr(s).replace('\n', '') self.assertEqual(getndots(strrepr), 2) s = Series([0, 100, 200, 400]) with option_context("display.max_rows", 2): strrepr = repr(s).replace('\n', '') self.assertEqual(getndots(strrepr), 3)
def test_output_significant_digits(self): # Issue #9764 # In case default display precision changes: with pd.option_context('display.precision', 6): # DataFrame example from issue #9764 d = pd.DataFrame( {'col1': [9.999e-8, 1e-7, 1.0001e-7, 2e-7, 4.999e-7, 5e-7, 5.0001e-7, 6e-7, 9.999e-7, 1e-6, 1.0001e-6, 2e-6, 4.999e-6, 5e-6, 5.0001e-6, 6e-6]}) expected_output = { (0, 6): ' col1\n0 9.999000e-08\n1 1.000000e-07\n2 1.000100e-07\n3 2.000000e-07\n4 4.999000e-07\n5 5.000000e-07', (1, 6): ' col1\n1 1.000000e-07\n2 1.000100e-07\n3 2.000000e-07\n4 4.999000e-07\n5 5.000000e-07', (1, 8): ' col1\n1 1.000000e-07\n2 1.000100e-07\n3 2.000000e-07\n4 4.999000e-07\n5 5.000000e-07\n6 5.000100e-07\n7 6.000000e-07', (8, 16): ' col1\n8 9.999000e-07\n9 1.000000e-06\n10 1.000100e-06\n11 2.000000e-06\n12 4.999000e-06\n13 5.000000e-06\n14 5.000100e-06\n15 6.000000e-06', (9, 16): ' col1\n9 0.000001\n10 0.000001\n11 0.000002\n12 0.000005\n13 0.000005\n14 0.000005\n15 0.000006' } for (start, stop), v in expected_output.items(): self.assertEqual(str(d[start:stop]), v)
def test_too_long(self): # GH 10451 with pd.option_context('display.precision', 4): # need both a number > 1e6 and something that normally formats to # having length > display.precision + 6 df = pd.DataFrame(dict(x=[12345.6789])) self.assertEqual(str(df), ' x\n0 12345.6789') df = pd.DataFrame(dict(x=[2e6])) self.assertEqual(str(df), ' x\n0 2000000.0') df = pd.DataFrame(dict(x=[12345.6789, 2e6])) self.assertEqual( str(df), ' x\n0 1.2346e+04\n1 2.0000e+06')
def test_precision(self): with pd.option_context('display.precision', 10): s = Styler(self.df) self.assertEqual(s.precision, 10) s = Styler(self.df, precision=2) self.assertEqual(s.precision, 2) s2 = s.set_precision(4) self.assertTrue(s is s2) self.assertEqual(s.precision, 4)
def _main(log_path, show_browser=False): print(log_path) df, metadata = process_log(log_path) del df['Timestamp'] df['Msg Type'] = df['Msg Type'].apply(escape_html_chars) df['Message'] = df['Message'].apply(escape_html_chars) # df['Message'] = df['Message'].apply(try_json) df['Message'] = df.apply(lambda row: format_error(row['Msg Type'], row['Message']), 1) df['Rev ID'] = df['Rev ID'].apply(lambda x: '<a href="https://www.wikidata.org/w/index.php?oldid={}&diff=prev">{}</a>'.format(x,x) if x else x) level_counts, info_counts, warning_counts, error_counts = generate_summary(df) warnings_df = df.query("Level == 'WARNING'") warnings_df.is_copy = False del warnings_df['Level'] if not warnings_df.empty: warnings_df = gen_ext_id_links(warnings_df) warnings_df = url_qid(warnings_df, "QID") errors_df = df.query("Level == 'ERROR'") errors_df.is_copy = False del errors_df['Level'] if not errors_df.empty: errors_df = gen_ext_id_links(errors_df) errors_df = url_qid(errors_df, "QID") # errors_df['Message'] = errors_df['Message'].apply(try_format_error) info_df = df.query("Level == 'INFO'") info_df.is_copy = False del info_df['Level'] if not info_df.empty: info_df = gen_ext_id_links(info_df) info_df = url_qid(info_df, "QID") info_df.Message = info_df.Message.str.replace("SKIP", "No Action") with pd.option_context('display.max_colwidth', -1): # this class nonsense is an ugly hack: https://stackoverflow.com/questions/15079118/js-datatables-from-pandas/41536906 level_counts = level_counts.to_frame().to_html(escape=False) info_counts = info_counts.to_frame().to_html(escape=False) warning_counts = warning_counts.to_frame().to_html(escape=False) error_counts = error_counts.to_frame().to_html(escape=False) info_df = info_df.to_html(escape=False, classes='df" id = "info_df') warnings_df = warnings_df.to_html(escape=False, classes='df" id = "warning_df') errors_df = errors_df.to_html(escape=False, classes='df" id = "error_df') template = Template(open(os.path.join(sys.path[0], "template.html")).read()) s = template.render(name=metadata['name'], run_id=metadata['run_id'], level_counts=level_counts, info_counts=info_counts, warning_counts=warning_counts, error_counts=error_counts, warnings_df=warnings_df, errors_df=errors_df, info_df=info_df) out_path = log_path.rsplit(".", 1)[0] + ".html" with open(out_path, 'w') as f: f.write(s) if show_browser: webbrowser.open(out_path)
def main(): parser = argparse.ArgumentParser() parser.add_argument('logfiles', type=str, nargs='+') parser.add_argument('--fields', type=str, default='ret,avglen,ent,kl,vf_r2,ttotal') parser.add_argument('--noplot', action='store_true') parser.add_argument('--plotfile', type=str, default=None) parser.add_argument('--range_end', type=int, default=None) args = parser.parse_args() assert len(set(args.logfiles)) == len(args.logfiles), 'Log files must be unique' fields = args.fields.split(',') # Load logs from all files fname2log = {} for fname in args.logfiles: if ':' in fname: os.system('rsync -avrz {} /tmp'.format(fname)) fname = os.path.join('/tmp', os.path.basename(fname)) with pd.HDFStore(fname, 'r') as f: assert fname not in fname2log df = f['log'] df.set_index('iter', inplace=True) fname2log[fname] = df.loc[:args.range_end, fields] # Print if not args.noplot or args.plotfile is not None: import matplotlib if args.plotfile is not None: matplotlib.use('Agg') import matplotlib.pyplot as plt plt.style.use('seaborn-colorblind') ax = None for fname, df in fname2log.items(): with pd.option_context('display.max_rows', 9999): print(fname) print(df[-1:]) if 'vf_r2' in df.keys(): df['vf_r2'] = np.maximum(0, df['vf_r2']) if not args.noplot: if ax is None: ax = df.plot(subplots=True, title=','.join(args.logfiles)) else: df.plot(subplots=True, title=','.join(args.logfiles), ax=ax, legend=False) if args.plotfile is not None: plt.savefig(args.plotfile, transparent=True, bbox_inches='tight', dpi=300) elif not args.noplot: plt.show()
def transform(self, X, y=None): # Suppress SettingWithCopyWarning (alternatively: add a X = X.copy() with pd.option_context('mode.chained_assignment', None): # --- Convert Embarked mapping = {'S': 0, 'C': 1, 'Q': 2, } X.loc[:, 'Embarked'] = X.loc[:, 'Embarked'].replace(mapping, inplace=False) # --- Convert Sex mapping = {'female': 0, 'male': 1 } X.loc[:, 'Sex'] = X['Sex'].replace(mapping, inplace=False) # --- Convert Name to Title X.loc[:, 'Title'] = X['Name'].map(lambda name: name.split(',')[1].split('.')[0].strip()) # a map of more aggregated titles mapping = { "Capt": 0, # Officer "Col": 0, # Officer "Major": 0, # Officer "Jonkheer": 1, # Royalty "Don": 1, # Royalty "Sir": 1, # Royalty "Dr": 0, # Officer "Rev": 0, # Officer "the Countess": 1, # Royalty "Dona": 1, # Royalty "Mme": 2, # "Mrs" "Mlle": 3, # "Miss" "Ms": 2, # "Mrs" "Mr": 4, # "Mr" "Mrs": 2, # "Mrs" "Miss": 3, # "Miss" "Master": 5, # "Master" "Lady": 1 # "Royalty" } X.loc[:, 'Title'] = X['Title'].map(mapping) X = X.drop('Name', 1) return X
def test_representation_to_series(self): idx1 = DatetimeIndex([], freq='D') idx2 = DatetimeIndex(['2011-01-01'], freq='D') idx3 = DatetimeIndex(['2011-01-01', '2011-01-02'], freq='D') idx4 = DatetimeIndex( ['2011-01-01', '2011-01-02', '2011-01-03'], freq='D') idx5 = DatetimeIndex(['2011-01-01 09:00', '2011-01-01 10:00', '2011-01-01 11:00'], freq='H', tz='Asia/Tokyo') idx6 = DatetimeIndex(['2011-01-01 09:00', '2011-01-01 10:00', pd.NaT], tz='US/Eastern') idx7 = DatetimeIndex(['2011-01-01 09:00', '2011-01-02 10:15']) exp1 = """Series([], dtype: datetime64[ns])""" exp2 = """0 2011-01-01 dtype: datetime64[ns]""" exp3 = """0 2011-01-01 1 2011-01-02 dtype: datetime64[ns]""" exp4 = """0 2011-01-01 1 2011-01-02 2 2011-01-03 dtype: datetime64[ns]""" exp5 = """0 2011-01-01 09:00:00+09:00 1 2011-01-01 10:00:00+09:00 2 2011-01-01 11:00:00+09:00 dtype: datetime64[ns, Asia/Tokyo]""" exp6 = """0 2011-01-01 09:00:00-05:00 1 2011-01-01 10:00:00-05:00 2 NaT dtype: datetime64[ns, US/Eastern]""" exp7 = """0 2011-01-01 09:00:00 1 2011-01-02 10:15:00 dtype: datetime64[ns]""" with pd.option_context('display.width', 300): for idx, expected in zip([idx1, idx2, idx3, idx4, idx5, idx6, idx7], [exp1, exp2, exp3, exp4, exp5, exp6, exp7]): result = repr(Series(idx)) self.assertEqual(result, expected)
def test_repr_max_columns_max_rows(self): term_width, term_height = get_terminal_size() if term_width < 10 or term_height < 10: raise nose.SkipTest("terminal size too small, " "{0} x {1}".format(term_width, term_height)) def mkframe(n): index = ['%05d' % i for i in range(n)] return DataFrame(0, index, index) df6 = mkframe(6) df10 = mkframe(10) with option_context('mode.sim_interactive', True): with option_context('display.width', term_width * 2): with option_context('display.max_rows', 5, 'display.max_columns', 5): self.assertFalse(has_expanded_repr(mkframe(4))) self.assertFalse(has_expanded_repr(mkframe(5))) self.assertFalse(has_expanded_repr(df6)) self.assertTrue(has_doubly_truncated_repr(df6)) with option_context('display.max_rows', 20, 'display.max_columns', 10): # Out off max_columns boundary, but no extending # since not exceeding width self.assertFalse(has_expanded_repr(df6)) self.assertFalse(has_truncated_repr(df6)) with option_context('display.max_rows', 9, 'display.max_columns', 10): # out vertical bounds can not result in exanded repr self.assertFalse(has_expanded_repr(df10)) self.assertTrue(has_vertically_truncated_repr(df10)) # width=None in terminal, auto detection with option_context('display.max_columns', 100, 'display.max_rows', term_width * 20, 'display.width', None): df = mkframe((term_width // 7) - 2) self.assertFalse(has_expanded_repr(df)) df = mkframe((term_width // 7) + 2) com.pprint_thing(df._repr_fits_horizontal_()) self.assertTrue(has_expanded_repr(df))
def to_clipboard(obj, excel=None, sep=None, **kwargs): # pragma: no cover """ Attempt to write text representation of object to the system clipboard The clipboard can be then pasted into Excel for example. Parameters ---------- obj : the object to write to the clipboard excel : boolean, defaults to True if True, use the provided separator, writing in a csv format for allowing easy pasting into excel. if False, write a string representation of the object to the clipboard sep : optional, defaults to tab other keywords are passed to to_csv Notes ----- Requirements for your platform - Linux: xclip, or xsel (with gtk or PyQt4 modules) - Windows: - OS X: """ from pandas.util.clipboard import clipboard_set if excel is None: excel = True if excel: try: if sep is None: sep = '\t' buf = StringIO() obj.to_csv(buf, sep=sep, **kwargs) clipboard_set(buf.getvalue()) return except: pass if isinstance(obj, DataFrame): # str(df) has various unhelpful defaults, like truncation with option_context('display.max_colwidth', 999999): objstr = obj.to_string(**kwargs) else: objstr = str(obj) clipboard_set(objstr)
def main(): parser = argparse.ArgumentParser() parser.add_argument('logfiles', type=str, nargs='+') parser.add_argument('--fields', type=str, default='trueret,avglen,ent,kl,vf_r2,vf_kl,tdvf_r2,rloss,racc') parser.add_argument('--noplot', action='store_true') parser.add_argument('--plotfile', type=str, default=None) parser.add_argument('--range_end', type=int, default=None) args = parser.parse_args() assert len(set(args.logfiles)) == len(args.logfiles), 'Log files must be unique' fields = args.fields.split(',') # Load logs from all files fname2log = {} for fname in args.logfiles: with pd.HDFStore(fname, 'r') as f: assert fname not in fname2log df = f['log'] df.set_index('iter', inplace=True) fname2log[fname] = df.loc[:args.range_end, fields] # Print stuff if not args.noplot or args.plotfile is not None: import matplotlib if args.plotfile is not None: matplotlib.use('Agg') import matplotlib.pyplot as plt; plt.style.use('ggplot') ax = None for fname, df in fname2log.items(): with pd.option_context('display.max_rows', 9999): print fname print df[-1:] df['vf_r2'] = np.maximum(0,df['vf_r2']) if ax is None: ax = df.plot(subplots=True, title=fname) else: df.plot(subplots=True, title=fname, ax=ax, legend=False) if not args.noplot: plt.show() if args.plotfile is not None: plt.savefig(args.plotfile, bbox_inches='tight', dpi=200)
def _print_df_scores(df_scores, score_types, indent=''): """Pretty print the scores dataframe. Parameters ---------- df_scores : pd.DataFrame the score dataframe score_types : list of score types a list of score types to use indent : str, default='' indentation if needed """ try: # try to re-order columns/rows in the printed array # we may not have all train, valid, test, so need to select index_order = np.array(['train', 'valid', 'test']) ordered_index = index_order[np.isin(index_order, df_scores.index)] df_scores = df_scores.loc[ ordered_index, [score_type.name for score_type in score_types]] except Exception: _print_warning("Couldn't re-order the score matrix..") with pd.option_context("display.width", 160): df_repr = repr(df_scores) df_repr_out = [] for line, color_key in zip(df_repr.splitlines(), [None, None] + list(df_scores.index.values)): if line.strip() == 'step': continue if color_key is None: # table header line = stylize(line, fg(fg_colors['title']) + attr('bold')) if color_key is not None: tokens = line.split() tokens_bak = tokens[:] if 'official_' + color_key in fg_colors: # line label and official score bold & bright label_color = fg(fg_colors['official_' + color_key]) tokens[0] = stylize(tokens[0], label_color + attr('bold')) tokens[1] = stylize(tokens[1], label_color + attr('bold')) if color_key in fg_colors: # other scores pale tokens[2:] = [stylize(token, fg(fg_colors[color_key])) for token in tokens[2:]] for token_from, token_to in zip(tokens_bak, tokens): line = line.replace(token_from, token_to) line = indent + line df_repr_out.append(line) print('\n'.join(df_repr_out))