我们从Python开源项目中,提取了以下22个代码示例,用于说明如何使用pandas.__version__()。
def write_legacy_pickles(output_dir): # make sure we are < 0.13 compat (in py3) try: from pandas.compat import zip, cPickle as pickle # noqa except: import pickle version = pandas.__version__ print("This script generates a storage file for the current arch, system, " "and python version") print(" pandas version: {0}".format(version)) print(" output dir : {0}".format(output_dir)) print(" storage format: pickle") pth = '{0}.pickle'.format(platform_name()) fh = open(os.path.join(output_dir, pth), 'wb') pickle.dump(create_pickle_data(), fh, pickle.HIGHEST_PROTOCOL) fh.close() print("created pickle file: %s" % pth)
def test_nan_selection_bug_4858(self): # GH 4858; nan selection bug, only works for pytables >= 3.1 if LooseVersion(tables.__version__) < '3.1.0': raise nose.SkipTest('tables version does not support fix for nan ' 'selection bug: GH 4858') with ensure_clean_store(self.path) as store: df = DataFrame(dict(cols=range(6), values=range(6)), dtype='float64') df['cols'] = (df['cols'] + 10).apply(str) df.iloc[0] = np.nan expected = DataFrame(dict(cols=['13.0', '14.0', '15.0'], values=[ 3., 4., 5.]), index=[3, 4, 5]) # write w/o the index on that particular column store.append('df', df, data_columns=True, index=['cols']) result = store.select('df', where='values>2.0') assert_frame_equal(result, expected)
def test_legacy_table_write(self): raise nose.SkipTest("cannot write legacy tables") store = HDFStore(tm.get_data_path( 'legacy_hdf/legacy_table_%s.h5' % pandas.__version__), 'a') df = tm.makeDataFrame() wp = tm.makePanel() index = MultiIndex(levels=[['foo', 'bar', 'baz', 'qux'], ['one', 'two', 'three']], labels=[[0, 0, 0, 1, 1, 2, 2, 3, 3, 3], [0, 1, 2, 0, 1, 1, 2, 0, 1, 2]], names=['foo', 'bar']) df = DataFrame(np.random.randn(10, 3), index=index, columns=['A', 'B', 'C']) store.append('mi', df) df = DataFrame(dict(A='foo', B='bar'), index=lrange(10)) store.append('df', df, data_columns=['B'], min_itemsize={'A': 200}) store.append('wp', wp) store.close()
def get_pandas_status(): try: import pandas as pd return _check_version(pd.__version__, pandas_min_version) except ImportError: traceback.print_exc() return default_status
def get_sklearn_status(): try: import sklearn as sk return _check_version(sk.__version__, sklearn_min_version) except ImportError: traceback.print_exc() return default_status
def get_numpy_status(): try: import numpy as np return _check_version(np.__version__, numpy_min_version) except ImportError: traceback.print_exc() return default_status
def get_scipy_status(): try: import scipy as sc return _check_version(sc.__version__, scipy_min_version) except ImportError: traceback.print_exc() return default_status
def get_h2o_status(): try: import h2o return _check_version(h2o.__version__, h2o_min_version) except ImportError: traceback.print_exc() return default_status
def sortDataFrame(df, column, ascending, inplace): if pd.__version__ in ['0.13.0', '0.14.1']: new_df = df.sort([column], ascending = [ascending], inplace = inplace) else: new_df = df.sort_values([column], ascending = [ascending], inplace = inplace) return new_df
def print_my_path(): print('cwd: {}'.format(getcwd())) print('__file__:{}'.format(__file__)) print('abspath: {}'.format(path.abspath(__file__))) print('tensorflow: {}'.format(tf.__version__)) print('pandas: {}'.format(pd.__version__))
def add_date_features_df(col_data, date_col): # Pandas nicely tries to prevent you from doing stupid things, like setting values on a copy of a df, not your real one # However, it's a bit overzealous in this case, so we'll side-step a bunch of warnings by setting is_copy to false here result = {} col_data = pd.to_datetime(col_data) if pandas_version < '0.20.0': result[date_col + '_day_of_week'] = col_data.apply(lambda x: x.weekday()).astype(int, raise_on_error=False) else: result[date_col + '_day_of_week'] = col_data.apply(lambda x: x.weekday()).astype(int, errors='ignore') try: if pandas_version < '0.20.0': result[date_col + '_hour'] = col_data.apply(lambda x: x.hour).astype(int, raise_on_error=False) else: result[date_col + '_hour'] = col_data.apply(lambda x: x.hour).astype(int, errors='ignore') result[date_col + '_minutes_into_day'] = col_data.apply(lambda x: x.hour * 60 + x.minute) result[date_col + '_hour'] = result[date_col + '_hour'].fillna(0) result[date_col + '_minutes_into_day'] = result[date_col + '_minutes_into_day'].fillna(0) except AttributeError: pass result[date_col + '_is_weekend'] = col_data.apply(lambda x: x.weekday() in (5,6)) result[date_col + '_day_part'] = result[date_col + '_minutes_into_day'].apply(minutes_into_day_parts) result[date_col + '_day_of_week'] = result[date_col + '_day_of_week'].fillna(0) result[date_col + '_is_weekend'] = result[date_col + '_is_weekend'].fillna(0) result[date_col + '_day_part'] = result[date_col + '_day_part'].fillna(0) return result # Same logic as above, except implemented for a single dictionary, which is much faster at prediction time when getting just a single prediction
def platform_name(): return '_'.join([str(pandas.__version__), str(pl.machine()), str(pl.system().lower()), str(pl.python_version())])
def write_legacy_msgpack(output_dir, compress): version = pandas.__version__ print("This script generates a storage file for the current arch, " "system, and python version") print(" pandas version: {0}".format(version)) print(" output dir : {0}".format(output_dir)) print(" storage format: msgpack") pth = '{0}.msgpack'.format(platform_name()) to_msgpack(os.path.join(output_dir, pth), create_msgpack_data(), compress=compress) print("created msgpack file: %s" % pth)
def sanity_check(): """ Report the version number of the core packages we use :return: Nothing """ import matplotlib print('matplotlib: {}'.format(matplotlib.__version__)) print('numpy: {}'.format(np.__version__)) print('pandas: {}'.format(pd.__version__)) ###############################################################################
def sanity_check(): import matplotlib print('matplotlib: {}'.format(matplotlib.__version__)) print('numpy: {}'.format(np.__version__)) print('pandas: {}'.format(pd.__version__))
def check_version(library, min_version): """Check minimum library version required Parameters ---------- library : str The library name to import. Must have a ``__version__`` property. min_version : str The minimum version string. Anything that matches ``'(\\d+ | [a-z]+ | \\.)'`` Returns ------- ok : bool True if the library exists with at least the specified version. """ ok = True try: library = __import__(library) except ImportError: ok = False else: this_version = LooseVersion(library.__version__) if this_version < min_version: ok = False return ok
def serialize(cls, formatted_data, fh): # compat: if pandas is old, to_pickle does not accept file handles if LooseVersion(pd.__version__) <= LooseVersion('0.20.3'): fh.close() fh = fh.name return pd.to_pickle(formatted_data, fh)
def hourly_resample(df, bse=0, minutes=60): """ Args: df: pandas dataframe containing time series needing resampling bse (int): base time to set; optional; default is zero (on the hour); minutes (int): sampling recurrence interval in minutes; optional; default is 60 (hourly samples) Returns: A Pandas DataFrame that has been resampled to every hour, at the minute defined by the base (bse) Description: see http://pandas.pydata.org/pandas-docs/dev/generated/pandas.DataFrame.resample.html for more info This function uses pandas powerful time-series manipulation to upsample to every minute, then downsample to every hour, on the hour. This function will need adjustment if you do not want it to return hourly samples, or iusgsGisf you are sampling more frequently than once per minute. see http://pandas.pydata.org/pandas-docs/stable/timeseries.html#offset-aliases """ if int(str(pd.__version__).split('.')[0]) == 0 and int(str(pd.__version__).split('.')[1]) < 18: # pandas versioning df = df.resample('1Min') else: # you can make this smaller to accomodate for a higher sampling frequency df = df.resample('1Min').first() # http://pandas.pydata.org/pandas-docs/dev/generated/pandas.Series.interpolate.html df = df.interpolate(method='time', limit=90) if int(str(pd.__version__).split('.')[0]) == 0 and int(str(pd.__version__).split('.')[1]) < 18: # pandas versioning df = df.resample(str(minutes) + 'Min', closed='left', label='left', base=bse) else: # modify '60Min' to change the resulting frequency df = df.resample(str(minutes) + 'Min', closed='left', label='left', base=bse).first() return df
def _get_versions(requirements=True): if requirements: import matplotlib as mpl import xarray as xr import pandas as pd import numpy as np return {'version': __version__, 'requirements': {'matplotlib': mpl.__version__, 'xarray': xr.__version__, 'pandas': pd.__version__, 'numpy': np.__version__, 'python': ' '.join(sys.version.splitlines())}} else: return {'version': __version__}
def fit(self, X_df, y=None): print('Running basic data cleaning') self.vals_to_drop = set(['ignore', 'output', 'regressor', 'classifier']) # See if we should fit TfidfVectorizer or not for key in X_df.columns: if X_df[key].dtype == 'object' and self.column_descriptions.get(key, False) not in ['categorical', 'ignore', 'nlp']: # First, make sure that the values in this column are not just ints, or float('nan') vals = X_df[key].sample(n=10) is_categorical = False for val in vals: try: if val is not None: float(val) except Exception as e: print(e) is_categorical = True if is_categorical: print('\n') print('Encountered a column that is not marked as categorical, but is an "object" pandas type, which typically indicates a categorical column.') print('The name of this columns is: "{}"'.format(key)) print('Some example features in this column are: {}'.format(list(X_df[key].sample(n=5)))) print('If this is a categorical column, please mark it as `{}: "categorical"` as part of your column_descriptions'.format(key)) print('If this is not a categorical column, please consider converting its dtype before passing data into auto_ml') print('\n') warnings.warn('Consider marking the "{}" column as categorical'.format(key)) if self.transformed_column_descriptions.get(key) is None: self.transformed_column_descriptions[key] = 'continuous' if key in self.text_columns: X_df[key].fillna('nan', inplace=True) if pandas_version < '0.20.0': text_col = X_df[key].astype(str, raise_on_error=False) else: text_col = X_df[key].astype(str, errors='ignore') self.text_columns[key].fit(text_col) col_names = self.text_columns[key].get_feature_names() # Make weird characters play nice, or just ignore them :) for idx, word in enumerate(col_names): try: col_names[idx] = str(word) except: col_names[idx] = 'non_ascii_word_' + str(idx) col_names = ['nlp_' + key + '_' + str(word) for word in col_names] self.text_columns[key].cleaned_feature_names = col_names return self
def _attributes(event_path, number_events, alpha, betas, lambda_, cpu_time, wall_time, function, method=None, attrs=None): width = max([len(ss) for ss in (event_path, str(number_events), str(alpha), str(betas), str(lambda_), function, str(method), socket.gethostname(), getpass.getuser())]) width = max(19, width) def _format(value): return '{0: <{width}}'.format(value, width=width) if not type(alpha) in (float, int): alpha = 'varying' new_attrs = {'date': _format(time.strftime("%Y-%m-%d %H:%M:%S")), 'event_path': _format(event_path), 'number_events': _format(number_events), 'alpha': _format(str(alpha)), 'betas': _format(str(betas)), 'lambda': _format(str(lambda_)), 'function': _format(function), 'method': _format(str(method)), 'cpu_time': _format(str(cpu_time)), 'wall_time': _format(str(wall_time)), 'hostname': _format(socket.gethostname()), 'username': _format(getpass.getuser()), 'pyndl': _format(__version__), 'numpy': _format(np.__version__), 'pandas': _format(pd.__version__), 'xarray': _format(xr.__version__), 'cython': _format(cython.__version__)} if attrs is not None: for key in set(attrs.keys()) | set(new_attrs.keys()): if key in attrs: old_val = attrs[key] else: old_val = '' if key in new_attrs: new_val = new_attrs[key] else: new_val = format_('') new_attrs[key] = old_val + ' | ' + new_val return new_attrs