我们从Python开源项目中,提取了以下23个代码示例,用于说明如何使用pandas.IndexSlice()。
def calc_shared(self, label): """ Get the subset of scores that are shared across all Selections in each Condition. """ if self.check_store("/main/{}/scores_shared".format(label)): return idx = pd.IndexSlice logging.info("Identifying subset shared across all Selections ({})" "".format(label), extra={'oname': self.name}) data = self.store.select("/main/{}/scores_shared_full".format(label)) # identify variants found in all selections in at least one condition complete = np.full(len(data.index), False, dtype=bool) for cnd in data.columns.levels[0]: complete = np.logical_or(complete, data.loc[:, idx[cnd, :, :]].notnull().all( axis='columns')) data = data.loc[complete] self.store.put("/main/{}/scores_shared".format(label), data, format="table")
def applymap(self, func, subset=None, **kwargs): """ Apply a function elementwise, updating the HTML representation with the result. .. versionadded:: 0.17.1 Parameters ---------- func : function subset : IndexSlice a valid indexer to limit ``data`` to *before* applying the function. Consider using a pandas.IndexSlice kwargs : dict pass along to ``func`` Returns ------- self : Styler """ self._todo.append((lambda instance: getattr(instance, '_applymap'), (func, subset), kwargs)) return self
def highlight_max(self, subset=None, color='yellow', axis=0): """ Highlight the maximum by shading the background .. versionadded:: 0.17.1 Parameters ---------- subset: IndexSlice, default None a valid slice for ``data`` to limit the style application to color: str, default 'yellow' axis: int, str, or None; default None 0 or 'index' for columnwise, 1 or 'columns' for rowwise or ``None`` for tablewise (the default) Returns ------- self : Styler """ return self._highlight_handler(subset=subset, color=color, axis=axis, max_=True)
def highlight_min(self, subset=None, color='yellow', axis=0): """ Highlight the minimum by shading the background .. versionadded:: 0.17.1 Parameters ---------- subset: IndexSlice, default None a valid slice for ``data`` to limit the style application to color: str, default 'yellow' axis: int, str, or None; default None 0 or 'index' for columnwise, 1 or 'columns' for rowwise or ``None`` for tablewise (the default) Returns ------- self : Styler """ return self._highlight_handler(subset=subset, color=color, axis=axis, max_=False)
def test_slice_with_negative_step(self): ts = Series(np.arange(20), period_range('2014-01', periods=20, freq='M')) SLC = pd.IndexSlice def assert_slices_equivalent(l_slc, i_slc): assert_series_equal(ts[l_slc], ts.iloc[i_slc]) assert_series_equal(ts.loc[l_slc], ts.iloc[i_slc]) assert_series_equal(ts.ix[l_slc], ts.iloc[i_slc]) assert_slices_equivalent(SLC[Period('2014-10')::-1], SLC[9::-1]) assert_slices_equivalent(SLC['2014-10'::-1], SLC[9::-1]) assert_slices_equivalent(SLC[:Period('2014-10'):-1], SLC[:8:-1]) assert_slices_equivalent(SLC[:'2014-10':-1], SLC[:8:-1]) assert_slices_equivalent(SLC['2015-02':'2014-10':-1], SLC[13:8:-1]) assert_slices_equivalent(SLC[Period('2015-02'):Period('2014-10'):-1], SLC[13:8:-1]) assert_slices_equivalent(SLC['2015-02':Period('2014-10'):-1], SLC[13:8:-1]) assert_slices_equivalent(SLC[Period('2015-02'):'2014-10':-1], SLC[13:8:-1]) assert_slices_equivalent(SLC['2014-10':'2015-02':-1], SLC[:0])
def test_slice_with_negative_step(self): ts = Series(np.arange(20), timedelta_range('0', periods=20, freq='H')) SLC = pd.IndexSlice def assert_slices_equivalent(l_slc, i_slc): assert_series_equal(ts[l_slc], ts.iloc[i_slc]) assert_series_equal(ts.loc[l_slc], ts.iloc[i_slc]) assert_series_equal(ts.ix[l_slc], ts.iloc[i_slc]) assert_slices_equivalent(SLC[Timedelta(hours=7)::-1], SLC[7::-1]) assert_slices_equivalent(SLC['7 hours'::-1], SLC[7::-1]) assert_slices_equivalent(SLC[:Timedelta(hours=7):-1], SLC[:6:-1]) assert_slices_equivalent(SLC[:'7 hours':-1], SLC[:6:-1]) assert_slices_equivalent(SLC['15 hours':'7 hours':-1], SLC[15:6:-1]) assert_slices_equivalent(SLC[Timedelta(hours=15):Timedelta(hours=7):- 1], SLC[15:6:-1]) assert_slices_equivalent(SLC['15 hours':Timedelta(hours=7):-1], SLC[15:6:-1]) assert_slices_equivalent(SLC[Timedelta(hours=15):'7 hours':-1], SLC[15:6:-1]) assert_slices_equivalent(SLC['7 hours':'15 hours':-1], SLC[:0])
def test_applymap_subset(self): def f(x): return 'foo: bar' slices = [pd.IndexSlice[:], pd.IndexSlice[:, ['A']], pd.IndexSlice[[1], :], pd.IndexSlice[[1], ['A']], pd.IndexSlice[:2, ['A', 'B']]] for slice_ in slices: result = self.df.style.applymap(f, subset=slice_)._compute().ctx expected = dict(((r, c), ['foo: bar']) for r, row in enumerate(self.df.index) for c, col in enumerate(self.df.columns) if row in self.df.loc[slice_].index and col in self.df.loc[slice_].columns) self.assertEqual(result, expected)
def _member_to_beacon_proximity(m2badge, beacons): """Creates a member-to-beacon proximity DataFrame from member-to-badge proximity data. Parameters ---------- m2badge : pd.DataFrame The member-to-badge proximity data, as returned by `member_to_badge_proximity`. beacons : list of str A list of beacon ids. Returns ------- pd.DataFrame : The member-to-member proximity data. """ df = m2badge.copy() # Rename 'observed_id' to 'beacon' df = df.rename_axis(['datetime', 'member', 'beacon']) # Filter out ids that are not in `beacons` return df.loc[pd.IndexSlice[:, :, beacons],:]
def apply(self, func, axis=0, subset=None, **kwargs): """ Apply a function column-wise, row-wise, or table-wase, updating the HTML representation with the result. .. versionadded:: 0.17.1 Parameters ---------- func: function axis: int, str or None apply to each column (``axis=0`` or ``'index'``) or to each row (``axis=1`` or ``'columns'``) or to the entire DataFrame at once with ``axis=None``. subset: IndexSlice a valid indexer to limit ``data`` to *before* applying the function. Consider using a pandas.IndexSlice kwargs: dict pass along to ``func`` Returns ------- self : Styler Notes ----- This is similar to ``DataFrame.apply``, except that ``axis=None`` applies the function to the entire DataFrame at once, rather than column-wise or row-wise. """ self._todo.append((lambda instance: getattr(instance, '_apply'), (func, axis, subset), kwargs)) return self
def _applymap(self, func, subset=None, **kwargs): func = partial(func, **kwargs) # applymap doesn't take kwargs? if subset is None: subset = pd.IndexSlice[:] subset = _non_reducing_slice(subset) result = self.data.loc[subset].applymap(func) self._update_ctx(result) return self
def background_gradient(self, cmap='PuBu', low=0, high=0, axis=0, subset=None): """ Color the background in a gradient according to the data in each column (optionally row). Requires matplotlib. .. versionadded:: 0.17.1 Parameters ---------- cmap: str or colormap matplotlib colormap low, high: float compress the range by these values. axis: int or str 1 or 'columns' for colunwise, 0 or 'index' for rowwise subset: IndexSlice a valid slice for ``data`` to limit the style application to Returns ------- self : Styler Notes ----- Tune ``low`` and ``high`` to keep the text legible by not using the entire range of the color map. These extend the range of the data by ``low * (x.max() - x.min())`` and ``high * (x.max() - x.min())`` before normalizing. """ subset = _maybe_numeric_slice(self.data, subset) subset = _non_reducing_slice(subset) self.apply(self._background_gradient, cmap=cmap, subset=subset, axis=axis, low=low, high=high) return self
def set_properties(self, subset=None, **kwargs): """ Convience method for setting one or more non-data dependent properties or each cell. .. versionadded:: 0.17.1 Parameters ---------- subset: IndexSlice a valid slice for ``data`` to limit the style application to kwargs: dict property: value pairs to be set for each cell Returns ------- self : Styler Examples -------- >>> df = pd.DataFrame(np.random.randn(10, 4)) >>> df.style.set_properties(color="white", align="right") """ values = ';'.join('{p}: {v}'.format(p=p, v=v) for p, v in kwargs.items()) f = lambda x: values return self.applymap(f, subset=subset)
def test_slice_with_negative_step(self): ts = Series(np.arange(20), date_range('2014-01-01', periods=20, freq='MS')) SLC = pd.IndexSlice def assert_slices_equivalent(l_slc, i_slc): assert_series_equal(ts[l_slc], ts.iloc[i_slc]) assert_series_equal(ts.loc[l_slc], ts.iloc[i_slc]) assert_series_equal(ts.ix[l_slc], ts.iloc[i_slc]) assert_slices_equivalent(SLC[Timestamp('2014-10-01')::-1], SLC[9::-1]) assert_slices_equivalent(SLC['2014-10-01'::-1], SLC[9::-1]) assert_slices_equivalent(SLC[:Timestamp('2014-10-01'):-1], SLC[:8:-1]) assert_slices_equivalent(SLC[:'2014-10-01':-1], SLC[:8:-1]) assert_slices_equivalent(SLC['2015-02-01':'2014-10-01':-1], SLC[13:8:-1]) assert_slices_equivalent(SLC[Timestamp('2015-02-01'):Timestamp( '2014-10-01'):-1], SLC[13:8:-1]) assert_slices_equivalent(SLC['2015-02-01':Timestamp('2014-10-01'):-1], SLC[13:8:-1]) assert_slices_equivalent(SLC[Timestamp('2015-02-01'):'2014-10-01':-1], SLC[13:8:-1]) assert_slices_equivalent(SLC['2014-10-01':'2015-02-01':-1], SLC[:0])
def test_set_properties_subset(self): df = pd.DataFrame({'A': [0, 1]}) result = df.style.set_properties(subset=pd.IndexSlice[0, 'A'], color='white')._compute().ctx expected = {(0, 0): ['color: white']} self.assertEqual(result, expected)
def test_apply_subset(self): axes = [0, 1] slices = [pd.IndexSlice[:], pd.IndexSlice[:, ['A']], pd.IndexSlice[[1], :], pd.IndexSlice[[1], ['A']], pd.IndexSlice[:2, ['A', 'B']]] for ax in axes: for slice_ in slices: result = self.df.style.apply(self.h, axis=ax, subset=slice_, foo='baz')._compute().ctx expected = dict(((r, c), ['color: baz']) for r, row in enumerate(self.df.index) for c, col in enumerate(self.df.columns) if row in self.df.loc[slice_].index and col in self.df.loc[slice_].columns) self.assertEqual(result, expected)
def test_display_subset(self): df = pd.DataFrame([[.1234, .1234], [1.1234, 1.1234]], columns=['a', 'b']) ctx = df.style.format({"a": "{:0.1f}", "b": "{0:.2%}"}, subset=pd.IndexSlice[0, :])._translate() expected = '0.1' self.assertEqual(ctx['body'][0][1]['display_value'], expected) self.assertEqual(ctx['body'][1][1]['display_value'], '1.1234') self.assertEqual(ctx['body'][0][2]['display_value'], '12.34%') raw_11 = '1.1234' ctx = df.style.format("{:0.1f}", subset=pd.IndexSlice[0, :])._translate() self.assertEqual(ctx['body'][0][1]['display_value'], expected) self.assertEqual(ctx['body'][1][1]['display_value'], raw_11) ctx = df.style.format("{:0.1f}", subset=pd.IndexSlice[0, :])._translate() self.assertEqual(ctx['body'][0][1]['display_value'], expected) self.assertEqual(ctx['body'][1][1]['display_value'], raw_11) ctx = df.style.format("{:0.1f}", subset=pd.IndexSlice['a'])._translate() self.assertEqual(ctx['body'][0][1]['display_value'], expected) self.assertEqual(ctx['body'][0][2]['display_value'], '0.1234') ctx = df.style.format("{:0.1f}", subset=pd.IndexSlice[0, 'a'])._translate() self.assertEqual(ctx['body'][0][1]['display_value'], expected) self.assertEqual(ctx['body'][1][1]['display_value'], raw_11) ctx = df.style.format("{:0.1f}", subset=pd.IndexSlice[[0, 1], ['a']])._translate() self.assertEqual(ctx['body'][0][1]['display_value'], expected) self.assertEqual(ctx['body'][1][1]['display_value'], '1.1') self.assertEqual(ctx['body'][0][2]['display_value'], '0.1234') self.assertEqual(ctx['body'][1][2]['display_value'], '1.1234')
def test_background_gradient(self): df = pd.DataFrame([[1, 2], [2, 4]], columns=['A', 'B']) for axis in [0, 1, 'index', 'columns']: for cmap in [None, 'YlOrRd']: result = df.style.background_gradient(cmap=cmap)._compute().ctx self.assertTrue(all("#" in x[0] for x in result.values())) self.assertEqual(result[(0, 0)], result[(0, 1)]) self.assertEqual(result[(1, 0)], result[(1, 1)]) result = (df.style.background_gradient(subset=pd.IndexSlice[1, 'A']) ._compute().ctx) self.assertEqual(result[(1, 0)], ['background-color: #fff7fb'])
def bundle_slice(json_obj, market_name, expiry_date_str): bundle_dict = config.find_bundle(json_obj, market_name, expiry_date_str) bundle_assets = list(bundle_dict[config.ASSETS].keys()) return pd.IndexSlice[:, bundle_assets]
def calc_pvalues_wt(self, label): """ Calculate uncorrected pvalue for each variant compared to wild type. """ if self.check_store("/main/{}/scores_pvalues_wt".format(label)): return idx = pd.IndexSlice wt = self.store.select("/main/{}/scores".format(label), "index=WILD_TYPE_VARIANT") if len(wt) == 0: # no wild type score logging.info("Failed to find wild type score, skipping wild type " "p-value calculations", extra={'oname': self.name}) return data = self.store.select("/main/{}/scores".format(label), "index!=WILD_TYPE_VARIANT") columns = pd.MultiIndex.from_product([sorted(self.child_names()), sorted(["z", "pvalue_raw"])], names=["condition", "value"]) result_df = pd.DataFrame(index=data.index, columns=columns) condition_labels = data.columns.levels[0] for cnd in condition_labels: result_df.loc[:, idx[cnd, 'z']] = \ np.absolute(wt.loc[WILD_TYPE_VARIANT, idx[cnd, 'score']] - data.loc[:, idx[cnd, 'score']]) / \ np.sqrt(wt.loc[WILD_TYPE_VARIANT, idx[cnd, 'SE']] ** 2 + data.loc[:, idx[cnd, 'SE']] ** 2) result_df.loc[:, idx[cnd, 'pvalue_raw']] = \ 2 * stats.norm.sf(result_df.loc[:, idx[cnd, 'z']]) self.store.put("/main/{}/scores_pvalues_wt".format(label), result_df, format="table")
def calc_pvalues_pairwise(self, label): """ Calculate pvalues for each variant in each pair of Conditions. """ if self.check_store("/main/{}/scores_pvalues".format(label)): return data = self.store['/main/{}/scores'.format(label)] cnd1_index = list() cnd2_index = list() values_index = list() values_list = ["z", "pvalue_raw"] condition_labels = data.columns.levels[0] for i, cnd1 in enumerate(condition_labels): for cnd2 in condition_labels[i + 1:]: cnd1_index.extend([cnd1] * len(values_list)) cnd2_index.extend([cnd2] * len(values_list)) values_index.extend(sorted(values_list)) columns = pd.MultiIndex.from_tuples(zip(cnd1_index, cnd2_index, values_index), names=["condition1", "condition2", "value"]) idx = pd.IndexSlice result_df = pd.DataFrame(np.nan, index=data.index, columns=columns) for i, cnd1 in enumerate(condition_labels): for cnd2 in condition_labels[i + 1:]: result_df.loc[:, idx[cnd1, cnd2, 'z']] = \ np.absolute(data.loc[:, idx[cnd1, 'score']] - data.loc[:, idx[cnd2, 'score']]) / \ np.sqrt(data.loc[:, idx[cnd1, 'SE']] ** 2 + data.loc[:, idx[cnd2, 'SE']] ** 2) result_df.loc[:, idx[cnd1, cnd2, 'pvalue_raw']] = \ 2 * stats.norm.sf(result_df.loc[:, idx[cnd1, cnd2, 'z']]) self.store.put("/main/{}/scores_pvalues".format(label), result_df, format="table")
def crack_egg(egg, subjects=None, lists=None): ''' Takes an egg and returns a subset of the subjects or lists Parameters ---------- egg : Egg data object Egg that you want to crack subjects : list List of subject idxs lists : list List of lists idxs Returns ---------- new_egg : Egg data object A sliced egg, good on a salad ''' from .egg import Egg all_have_features = egg.features is not None opts = {} if subjects is None: subjects = egg.pres.index.levels[0].values.tolist() elif type(subjects) is not list: subjects = list(subjects) if lists is None: lists = egg.pres.index.levels[1].values.tolist() elif type(lists) is not list: lists = list(lists) idx = pd.IndexSlice pres = egg.pres.loc[idx[subjects,lists],egg.pres.columns] rec = egg.rec.loc[idx[subjects,lists],egg.rec.columns] pres = [pres.loc[sub,:].values.tolist() for sub in subjects] rec = [rec.loc[sub,:].values.tolist() for sub in subjects] if all_have_features: features = egg.features.loc[idx[subjects,lists],egg.features.columns] opts['features'] = [features.loc[sub,:].values.tolist() for sub in subjects] return Egg(pres=pres, rec=rec, **opts)
def calc_scores(self, label): """ Combine the scores and standard errors within each condition. """ if self.check_store("/main/{}/scores".format(label)): return logging.info("Calculating per-condition scores ({})".format(label), extra={'oname': self.name}) # set up new data frame shared_index = self.store.select("/main/{}/scores_shared" "".format(label), "columns='index'").index columns = pd.MultiIndex.from_product([sorted(self.child_names()), sorted(["score", "SE", "epsilon"])], names=["condition", "value"]) data = pd.DataFrame(np.nan, index=shared_index, columns=columns) del shared_index del columns # set up local variables idx = pd.IndexSlice score_df = self.store.select("/main/{}/scores_shared".format(label)) if self.scoring_method == "simple": # special case for simple ratios that have no SE # calculates the average score for cnd in score_df.columns.levels[0]: data.loc[:, idx[cnd, 'score']] = \ score_df.loc[:, idx[cnd, :, 'score']].mean(axis=1) else: for cnd in score_df.columns.levels[0]: y = np.array(score_df.loc[:, idx[cnd, :, 'score']].values).T sigma2i = \ np.array(score_df.loc[:, idx[cnd, :, 'SE']].values ** 2).T # single replicate of the condition if y.shape[0] == 1: data.loc[:, idx[cnd, 'score']] = y.ravel() data.loc[:, idx[cnd, 'SE']] = np.sqrt(sigma2i).ravel() data.loc[:, idx[cnd, 'epsilon']] = 0. # multiple replicates else: betaML, var_betaML, eps = rml_estimator(y, sigma2i) data.loc[:, idx[cnd, 'score']] = betaML data.loc[:, idx[cnd, 'SE']] = np.sqrt(var_betaML) data.loc[:, idx[cnd, 'epsilon']] = eps # special case for normalized wild type variant if self.logr_method == "wt" and WILD_TYPE_VARIANT in \ data.index: data.loc[WILD_TYPE_VARIANT, idx[:, 'SE']] = 0. data.loc[WILD_TYPE_VARIANT, idx[:, 'score']] = 0. data.loc[WILD_TYPE_VARIANT, idx[:, 'epsilon']] = 0. # store the data self.store.put("/main/{}/scores".format(label), data, format="table")