我们从Python开源项目中,提取了以下43个代码示例,用于说明如何使用pandas.set_option()。
def get_analysis(self, instrument): """??????????""" # pd.set_option("display.max_rows", len(x)) ohlc_data = self.feed_list[0].bar.df ohlc_data.set_index("date", inplace=True) ohlc_data.index = pd.DatetimeIndex(ohlc_data.index) dbal = self.fill.balance.df start = dbal.index[0] end = dbal.index[-1] capital = self.fill.initial_cash tlog = self.get_tlog(instrument) tlog = tlog[tlog["units"] != 0] tlog.reset_index(drop=True, inplace=True) analysis = stats(ohlc_data, tlog, dbal, start, end, capital) print(dict_to_table(analysis))
def df_to_string(df): """ Create a formatted str representation of the DataFrame. Parameters ---------- df: DataFrame Returns ------- str """ pd.set_option('display.expand_frame_repr', False) pd.set_option('precision', 8) pd.set_option('display.width', 1000) pd.set_option('display.max_colwidth', 1000) return df.to_string()
def run(self): #pandas.set_option('display.width', 200) #s_code,code,dateline,chg_m,chg,open,close,high,low,last_close,name ma5 ma89 dif difma self.makeMa() #print self.df #last5 = self.df[self.df['dateline'] == int(self.setting['end'])] #print last5 #sys.exit() tmp = self.df[(self.df['dateline'] == int(self.setting['end'])) & (self.df['dif'] > self.df['difma'])] #print tmp #sys.exit() for code in tmp.values: #print code #sys.exit() if code[13] >= code[14]: print "%s,%s,DMA,%s" % (code[0], code[10], code[3]) #print code #sys.exit() #print self.df #dif = ma(close,5) - ma(close,89) #difma = ma(dif,36)
def ACO(self, df): """ Helper indicator :param df: :return: """ df_mid_points = (df['High'] + df['Low']) / 2 mid_points = Data.toFloatArray(df_mid_points) longav = tl.SMA(np.array(mid_points), timeperiod=40) shortav = tl.SMA(np.array(mid_points), timeperiod=15) A0 = longav - shortav Mavg = tl.SMA(A0, timeperiod=15) AcResult = tl.SMA(Mavg - A0, timeperiod=15) signals = np.diff(AcResult) return signals # if __name__ == "__main__": # np.set_printoptions(threshold=np.nan) # pd.set_option("display.max_rows", 280) # dt = Data() # df = dt.getCSVData() # #ACOscillator(df) # ACOscillator(df)
def show_domain_stats(log, output, top=50): log['Domain'] = log['url'].apply(get_domain) by_domain = log.groupby('Domain') top_domains = ( by_domain.count().sort_values('url', ascending=False)['url'].index) stats_by_domain = pd.DataFrame(index=top_domains) stats_by_domain['Pages'] = by_domain.count()['url'] stats_by_domain['Total Score'] = by_domain.sum()['score'].astype(int) stats_by_domain['Mean Score'] = by_domain.mean()['score'] stats_by_domain['Max Depth'] = by_domain.max()['depth'] stats_by_domain['Median Depth'] = by_domain.median()['depth'].astype(int) print() pages = stats_by_domain['Pages'] print('Top {} domains stats (covering {:.1%} pages)' .format(top, pages[:top].sum() / pages.sum())) pd.set_option('display.width', 1000) print(stats_by_domain[:top]) if output: filename = '{}-by-domain.csv'.format(output) stats_by_domain.to_csv(filename) print() print('Saved domain stats to {}'.format(filename))
def cleanup(me): if hasattr(me, "_pd_display_maxcolwidth"): pd.set_option('display.max_colwidth', me._pd_display_maxcolwidth) engine, session, handler, patchers = me.engine, me.session, me.handler, me.patchers if me.engine: if me.session: try: me.session.rollback() me.session.close() except: pass try: Base.metadata.drop_all(me.engine) except: pass for patcher in patchers: patcher.stop() hndls = query_logger.handlers[:] handler.close() for h in hndls: if h is handler: query_logger.removeHandler(h)
def convert_tables(self): """ Based on the confidence score, convert xmap file and two corresponding cmap files into "pandas table". """ pd.set_option('display.width',200) with open ('%s.table' % self.name, 'a') as xmap_table: with open (self.xmap) as xmap: for line in xmap: if line.startswith('#h'): hearder = line[3:] xmap_table.write(hearder) if line[0]!='#': xmap_table.write(line) with open ('%s.rtable' % self.name, 'a') as rcmap_table: with open (self.rcmap) as rcmap: for line in rcmap: if line.startswith('#h'): hearder = line[3:] rcmap_table.write(hearder) if line[0]!='#': rcmap_table.write(line) with open ('%s.qtable' % self.name, 'a') as qcmap_table: with open (self.qcmap) as qcmap: for line in qcmap: if line.startswith('#h'): hearder = line[3:] qcmap_table.write(hearder) if line[0]!='#': qcmap_table.write(line) self.XmapTable = pd.read_table('%s.table' % self.name) headers_x = ['RefContigID','RefStartPos','RefEndPos','QryContigID','QryStartPos', 'QryEndPos','Orientation', 'Confidence','QryLen','RefLen', 'Alignment'] self.filtered_XmapTable = self.XmapTable[self.XmapTable['Confidence']>=self.confidence_score][headers_x].reset_index(drop=True) headers_r = ['CMapId','ContigLength','NumSites','SiteID','Position'] self.RcmapTable = pd.read_table('%s.rtable' % self.name)[headers_r] headers_q = ['CMapId','ContigLength','NumSites','SiteID','Position','Coverage'] self.QcmapTable = pd.read_table('%s.qtable' % self.name)[headers_q] os.remove('%s.table' % self.name) os.remove('%s.rtable' % self.name) os.remove('%s.qtable' % self.name)
def get_results(filename, seed_image_id): pd.set_option('display.max_rows', 10000) start_time = time.time() df = pd.read_csv(filename) # temp_key, key, ground_truth, prediction, result # 00000, 00000, 43, 1095, 0.3076 if len(df.index) == 0: # Wow, it could not find anything. The image must be mud... return [] del df['temp_key'] df.prediction = df.ground_truth - (df.prediction - 1000) df_plus = df[df.prediction >= 0] df_neg = df[df.prediction < 0] df_neg.prediction += 360 df = pd.concat([df_plus, df_neg]) del df['ground_truth'] df = df.groupby(['key', 'prediction']).result.sum().reset_index() filtered_results = [] for image_id, image_results in df.groupby(['key']): top_result_index = image_results['result'].idxmax() angle = image_results.ix[top_result_index]['prediction'] max_value = image_results.ix[top_result_index]['result'] filtered_results.append([seed_image_id, image_id, int(angle), max_value]) print 'Done reading results, with slow python, for seed image ID:' + str(seed_image_id) + ' in %s seconds' % ( time.time() - start_time) return filtered_results
def _gen_summary(self, col_width=50): pd.set_option('display.max_colwidth', -1) song_name = '<p style="color:{0};">{1}</p>'.format(attr_color[self.live.attr], self.live.name) df_head = pd.DataFrame({'Song Name': [song_name]}) df_head['Difficulty'] = self.live.difficulty df_head['Score'] = int(self.global_status['cum_score']) df_head['Cover Rate'] = '{0:.2f}%'.format(100*(self.simul_result['timing_sec'] <= self.simul_result['judge_end_time']).mean()) df_head['Max Combo'] = self.simul_result['combo'].max() for accr in accuracy_list: df_head[accr] = self.global_status['note_stat'][accr] card = ['<img src="{0}" width={1} />'.format(icon_path(card.card_id, card.idolized), col_width) for card in self.card_list] summary, keys = [], ['base_score', 'score', 'hp', 'judge', 'weak_judge'] for i in range(len(card)): temp = {k:getattr(self.skill_tracker[i], 'cum_'+k) for k in keys} temp['card'] = card[i] summary.append(temp) df = pd.DataFrame(summary, columns=['card']+keys) df = df.append(pd.DataFrame(df.sum()).transpose()) df['base_score'] = df['base_score'].apply(lambda x: '<p>{0}</p>'.format(int(x))) df['score'] = df['score'].apply(lambda x: '<p>{0}</p>'.format(int(x))) df['hp'] = df['hp'].apply(lambda x: '<p>{0}</p>'.format(int(x))) df['judge'] = df['judge'].apply(lambda x: '<p>{0}</p>'.format(round(x,1))) df['weak_judge'] = df['weak_judge'].apply(lambda x: '<p>{0}</p>'.format(round(x,1))) df.index = ['<p>{0}</p>'.format(x) for x in ['L1', 'L2', 'L3', 'L4', 'C', 'R4', 'R3', 'R2', 'R1', 'Total']] df.loc['<p>Total</p>', 'card'] = '' html_code = df_head.to_html(escape=False, index=False) + df.transpose().to_html(escape=False) return HTML(html_code)
def get_turbine_types(print_out=True, **kwargs): r""" Get the names of all possible wind turbine types for which the power coefficient curve or power curve is provided in the data files in the directory windpowerlib/data. Parameters ---------- print_out : boolean Directly prints the list of types if set to True. Default: True. Examples -------- >>> from windpowerlib import wind_turbine >>> turbines = wind_turbine.get_turbine_types(print_out=False) >>> print(turbines[turbines["turbine_id"].str.contains("ENERCON")].iloc[0]) turbine_id ENERCON E 101 3000 p_nom 3000000 Name: 25, dtype: object """ df = read_turbine_data(**kwargs) if print_out: pd.set_option('display.max_rows', len(df)) print(df[['turbine_id', 'p_nom']]) pd.reset_option('display.max_rows') return df[['turbine_id', 'p_nom']]
def show_heat_map(self): pd.set_option('precision', 2) plt.figure(figsize=(20, 6)) sns.heatmap(self.data.corr(), square=True) plt.xticks(rotation=90) plt.yticks(rotation=360) plt.suptitle("Correlation Heatmap") plt.show()
def show_heat_map_to(self, target='sentiment'): correlations = self.data.corr()[target].sort_values(ascending=False) plt.figure(figsize=(40, 6)) correlations.drop(target).plot.bar() pd.set_option('precision', 2) plt.xticks(rotation=90, fontsize=7) plt.yticks(rotation=360) plt.suptitle('The Heatmap of Correlation With ' + target) plt.show()
def print_table(table, name=None, fmt=None): """ Pretty print a pandas DataFrame. Uses HTML output if running inside Jupyter Notebook, otherwise formatted text output. Parameters ---------- table : pandas.Series or pandas.DataFrame Table to pretty-print. name : str, optional Table name to display in upper left corner. fmt : str, optional Formatter to use for displaying table elements. E.g. '{0:.2f}%' for displaying 100 as '100.00%'. Restores original setting after displaying. """ if isinstance(table, pd.Series): table = pd.DataFrame(table) if fmt is not None: prev_option = pd.get_option('display.float_format') pd.set_option('display.float_format', lambda x: fmt.format(x)) if name is not None: table.columns.name = name display(table) if fmt is not None: pd.set_option('display.float_format', prev_option)
def get_pretty_stats(stats, recorded_cols=None, num_rows=10): """ Format and print the last few rows of a statistics DataFrame. See the pyfolio project for the data structure. Parameters ---------- stats: list[Object] An array of statistics for the period. num_rows: int The number of rows to display on the screen. Returns ------- str """ if isinstance(stats, pd.DataFrame): stats = stats.T.to_dict().values() df, columns = prepare_stats(stats, recorded_cols=recorded_cols) pd.set_option('display.expand_frame_repr', False) pd.set_option('precision', 8) pd.set_option('display.width', 1000) pd.set_option('display.max_colwidth', 1000) formatters = { 'returns': lambda returns: "{0:.4f}".format(returns), } return df.tail(num_rows).to_string( columns=columns, formatters=formatters )
def run(self): sql_data = "select * FROM s_stock_runtime WHERE dateline =20160607 and s_code='sh600774' " tmpdf = pandas.read_sql(sql_data, self.mysql.db) pandas.set_option('display.width', 400) res = {} for i in range(len(tmpdf)): item = tmpdf.iloc[i] #inf = '' if item.s_code not in res.keys(): res[item.s_code] = {'B': 0, 'S': 0} if item.B_1_volume > 100000: res[item.s_code]['B'] += 1 if item.B_2_volume > 100000: res[item.s_code]['B'] += 1 if item.B_3_volume > 100000: res[item.s_code]['B'] += 1 if item.B_4_volume > 100000: res[item.s_code]['B'] += 1 if item.B_5_volume > 100000: res[item.s_code]['B'] += 1 if item.S_1_volume > 100000: res[item.s_code]['S'] += 1 if item.S_2_volume > 100000: #print item res[item.s_code]['S'] += 1 if item.S_3_volume > 100000: res[item.s_code]['S'] += 1 if item.S_4_volume > 100000: res[item.s_code]['S'] += 1 if item.S_5_volume > 100000: res[item.s_code]['S'] += 1 print res
def make_state_page(df, conn, keyname='CA', bucketname='www.jobs.com'): '''ingests a table to print do s3 website bucket''' # fix issue with printing the entire dataframe pd.set_option('display.max_colwidth', -1) website_bucket = conn.get_bucket(bucketname) html = df.to_html( formatters=dict( title=markdown ), escape=False, index=True ) +" postings last updated "+str(datetime.now().strftime("%Y-%m-%d %H:%M")) html = '<!DOCTYPE html><HTML><head><link rel="stylesheet" href="http://s3.amazonaws.com/www.jobs.com/style.css"></head><body>{}</body></HTML>'.format(html.encode('utf8')) send_to_s3(keyname=keyname, bucket=website_bucket, html=html) return None
def test_headers(measure_type="Scoring"): import pandas as pd pd.set_option('display.max_columns', None) nba_player = NBA_player("203382", "Baynes, Aron", "Aron Baynes") nba_player.getPlayerStats(measure_type=measure_type) df = pd.DataFrame(columns = nba_player.header) df.loc[0] = nba_player.getPlayerStats(measure_type=measure_type)[0][1] print(df) return nba_player
def set_pandas_print_options(): # w, h = pd.util.terminal.get_terminal_size() # set output options for regression tests on a wide terminal pd.set_option('display.width', 100) # reduce precision to avoid to sensitive tests because of roundings: pd.set_option('display.precision', 6)
def test_format_sparse_config(self): warn_filters = warnings.filters warnings.filterwarnings('ignore', category=FutureWarning, module=".*format") # GH1538 pd.set_option('display.multi_sparse', False) result = self.index.format() self.assertEqual(result[1], 'foo two') self.reset_display_options() warnings.filters = warn_filters
def test_underlying_data_conversion(self): # GH 4080 df = DataFrame(dict((c, [1, 2, 3]) for c in ['a', 'b', 'c'])) df.set_index(['a', 'b', 'c'], inplace=True) s = Series([1], index=[(2, 2, 2)]) df['val'] = 0 df df['val'].update(s) expected = DataFrame( dict(a=[1, 2, 3], b=[1, 2, 3], c=[1, 2, 3], val=[0, 1, 0])) expected.set_index(['a', 'b', 'c'], inplace=True) tm.assert_frame_equal(df, expected) # GH 3970 # these are chained assignments as well pd.set_option('chained_assignment', None) df = DataFrame({"aa": range(5), "bb": [2.2] * 5}) df["cc"] = 0.0 ck = [True] * len(df) df["bb"].iloc[0] = .13 # TODO: unused df_tmp = df.iloc[ck] # noqa df["bb"].iloc[0] = .15 self.assertEqual(df['bb'].iloc[0], 0.15) pd.set_option('chained_assignment', 'raise') # GH 3217 df = DataFrame(dict(a=[1, 3], b=[np.nan, 2])) df['c'] = np.nan df['c'].update(pd.Series(['foo'], index=[0])) expected = DataFrame(dict(a=[1, 3], b=[np.nan, 2], c=['foo', np.nan])) tm.assert_frame_equal(df, expected)
def test_api_default_format(self): # default_format option with ensure_clean_store(self.path) as store: df = tm.makeDataFrame() pandas.set_option('io.hdf.default_format', 'fixed') _maybe_remove(store, 'df') store.put('df', df) self.assertFalse(store.get_storer('df').is_table) self.assertRaises(ValueError, store.append, 'df2', df) pandas.set_option('io.hdf.default_format', 'table') _maybe_remove(store, 'df') store.put('df', df) self.assertTrue(store.get_storer('df').is_table) _maybe_remove(store, 'df2') store.append('df2', df) self.assertTrue(store.get_storer('df').is_table) pandas.set_option('io.hdf.default_format', None) with ensure_clean_path(self.path) as path: df = tm.makeDataFrame() pandas.set_option('io.hdf.default_format', 'fixed') df.to_hdf(path, 'df') with get_store(path) as store: self.assertFalse(store.get_storer('df').is_table) self.assertRaises(ValueError, df.to_hdf, path, 'df2', append=True) pandas.set_option('io.hdf.default_format', 'table') df.to_hdf(path, 'df3') with HDFStore(path) as store: self.assertTrue(store.get_storer('df3').is_table) df.to_hdf(path, 'df4', append=True) with HDFStore(path) as store: self.assertTrue(store.get_storer('df4').is_table) pandas.set_option('io.hdf.default_format', None)
def setUpClass(cls): pd.set_option('chained_assignment', 'raise')
def __show_results_in_table(self): """Show results in pandas DataFrame format.""" df = super(ViewPresenter, self)._prepare_dataframe() if df is not None: # Better visualization in command line pd.set_option('expand_frame_repr', False) pd.set_option('display.max_columns', 999) display(df)
def print_full(x): """Print all rows in Pandas DataFrame x.""" pd.set_option('display.max_rows', len(x)) print(x) pd.reset_option('display.max_rows')
def update_html(df, metadb_timestamp): pd.set_option('display.max_colwidth', -1) table_html = df.to_html(formatters={ "doi": format_doi, "gse": format_gse}, escape=False, index=False, justify="left", classes="table table-striped table-bordered") html_template_str = unicode(open("output_template.html").read()) n_overdue = df.shape[0] final_html = html_template_str.format(date_updated=datetime.date.today(), metageo_timestamp=metadb_timestamp, n_overdue=n_overdue, table_html=table_html, tracking_script=tracking_script) with open("docs/index.html", "w") as f: f.write(final_html.encode("utf-8"))
def update_html(df, sradb_timestamp): pd.set_option('display.max_colwidth', -1) table_html = df.to_html(formatters={ "doi": format_doi, "srx": format_srx}, escape=False, index=False, justify="left", classes="table table-striped table-bordered") html_template_str = unicode(open("sra_template.html").read()) n_overdue = df.shape[0] final_html = html_template_str.format(date_updated=datetime.date.today(), sradb_timestamp=sradb_timestamp, n_overdue=n_overdue, table_html=table_html, tracking_script=tracking_script) with open("docs/sra.html", "w") as f: f.write(final_html.encode("utf-8"))
def format_pandas(opts=pandas_options): try: import pandas as pd for key, val in opts.items(): pd.set_option(key, val) except ImportError: return
def print_full(df): ''' print all rows of pd.DataFrame ''' pd.set_option('display.max_rows', len(df)) print('\n') print(df) pd.reset_option('display.max_rows') # TODO:
def printSeries(series, label, header='', asStr=False): """ Print a `series` of values, with a give `label`. :param series: (convertible to pandas Series) the values :param label: (str) a label to print for the data :return: none """ import pandas as pd if type(series) == pd.DataFrame: df = series df = df.T else: df = pd.DataFrame(pd.Series(series)) # DF is more convenient for printing df.columns = [label] oldPrecision = pd.get_option('precision') pd.set_option('precision', 5) s = "%s\n%s" % (header, df.T) pd.set_option('precision', oldPrecision) if asStr: return s else: print(s)
def _predict(args, cell): schema, features = _local_predict.get_model_schema_and_features(args['model']) headers = [x['name'] for x in schema] img_cols = [] for k, v in six.iteritems(features): if v['transform'] in ['image_to_vec']: img_cols.append(v['source_column']) data = args['data'] df = _local_predict.get_prediction_results( args['model'], data, headers, img_cols=img_cols, cloud=False, show_image=not args['no_show_image']) def _show_img(img_bytes): return '<img src="data:image/png;base64,' + img_bytes + '" />' def _truncate_text(text): return (text[:37] + '...') if isinstance(text, six.string_types) and len(text) > 40 else text # Truncate text explicitly here because we will set display.max_colwidth to -1. # This applies to images to but images will be overriden with "_show_img()" later. formatters = {x: _truncate_text for x in df.columns if df[x].dtype == np.object} if not args['no_show_image'] and img_cols: formatters.update({x + '_image': _show_img for x in img_cols}) # Set display.max_colwidth to -1 so we can display images. old_width = pd.get_option('display.max_colwidth') pd.set_option('display.max_colwidth', -1) try: IPython.display.display(IPython.display.HTML( df.to_html(formatters=formatters, escape=False, index=False))) finally: pd.set_option('display.max_colwidth', old_width)
def exportGenre(df, genre): print ("Now exporting ", genre) pd.set_option('display.width', 120) df_genre = df[df['genre'] == genre] print(df_genre.shape) df_sample = df_genre.ix[np.random.choice(df_genre.index, 10000, replace=False)] #print(df_sample) print(df_sample.shape) with open("lyrics/" + genre + ".txt", "a") as f: for index, row in df_sample.iterrows(): f.write("<S>\n" + row['lyrics'] + "\n<E>\n")
def print_full(x): pd.set_option('display.max_rows', len(x)) print(x) pd.reset_option('display.max_rows')
def result(): pd.set_option('display.max_rows', 1000) profit_to_loss() # best_rand_comb()
def show_data(x): df = read_list("Tick/"+get_time(), ['last_price','volume','bp1','bo1','bq1','ap1','ao1','aq1', 'bp2','bo2','bq2','ap2','ao2','aq2','instrument_token','timestamp']) df = df.query('instrument_token == '+str(x)) pd.set_option('display.max_rows', len(df)) print df
def download_data(quote,day=0): days = day+1 url1='http://www.google.com/finance/getprices?q=' url2='&x=NSE&i=60&p=' url3='d&f=d,c,o,h,l,v&df=cpct&auto=1&ts=1266701290218' #Not using the ts=1266701290218 parameter, if something goes wrong, do try it df = pd.read_csv(url1+quote+url2+str(days)+url3,header=4,parse_dates=True, skiprows=[5,6,7]) # print df pd.set_option('display.max_rows', 100) if(days>1): i=0 for i in range(2,len(df)): # print df.iat[i,0] if(str(df.iat[i,0]).startswith('a')): # print "the next day readings start form " + str(df.iat[i,0]) df.iat[i,0] = df.iat[i,0][1:] try: if(int(df.iat[i,0])-int(df.iat[i-2,0])<0): break except: print df continue #i=df.index.get_loc('a',method='ffill') df=df.ix[0:i-2] # print df df.columns = ['DATE', 'CLOSE','HIGH','LOW','OPEN','VOLUME'] df=df.set_index('DATE') #print df return df
def main(): """Run Eskapade Top-level control function for an Eskapade run started from the command line. Arguments specified by the user are parsed and converted to settings in the configuration object. Optionally, an interactive IPython session is started when the run is finished. """ # create parser for command-line arguments parser = create_arg_parser() user_args = parser.parse_args() # create config object for settings if not user_args.unpickle_config: # create new config settings = ConfigObject() else: # read previously persisted settings if pickled file is specified conf_path = user_args.config_files.pop(0) settings = ConfigObject.import_from_file(conf_path) del user_args.unpickle_config # set configuration macros settings.add_macros(user_args.config_files) # set user options settings.set_user_opts(user_args) # run Eskapade core.execution.run_eskapade(settings) # start interpreter if requested (--interactive on command line) if settings.get('interactive'): # create process manager, config object, and data store proc_mgr = ProcessManager() settings = proc_mgr.service(ConfigObject) ds = proc_mgr.service(DataStore) # set Pandas display options pd.set_option('display.width', 120) pd.set_option('display.max_columns', 50) # start interactive session log = logging.getLogger(__name__) log.info("Continuing interactive session ... press Ctrl+d to exit.\n") IPython.embed()
def main(): desired_width = 600 pd.set_option('display.width', desired_width) # specify sentence/true headline/predicted headline path. sentence_path = './dataset/test_enc.txt' true_headline_path = "./dataset/test_dec.txt" predicted_headline_path = "./output/predicted_test_headline.txt" # specify number of lines to read. number_of_lines_read = 400 with open(true_headline_path) as ft: print("reading actual headlines...") true_headline = [next(ft).strip() for line in range(number_of_lines_read)] ft.close() with open(predicted_headline_path) as fp: print("reading predicted headlines...") predicted_headline = [] for line in range(number_of_lines_read): predicted_headline.append(next(fp).strip()) fp.close() # for debugging to detect empty predicted headlines (empty predicted headline will cause error while calculating BLEU) # print (predicted_headline[88380]) # print (true_headline[88380]) with open(sentence_path) as f: print("reading sentences...") sentence = [next(f).strip() for line in range(number_of_lines_read)] ft.close() # For testing purpose # true_headline = ["F1's Schumacher Slams Into Wall"] # predicted_headline = ["Schumacher Crashes in Practice"] BLEUscore, avgBLEUscore = getBLEUscore(true_headline, predicted_headline) print("average BLEU score: %f" % avgBLEUscore) summary = list(zip(BLEUscore, predicted_headline, true_headline, sentence)) # pd.set_option("display.max_rows", 999) # pd.set_option('max_colwidth', 80) df = pd.DataFrame(data=summary, columns=['BLEU score', 'Predicted headline', 'True headline', 'article']) df_sortBLEU = df.sort_values('BLEU score', ascending=False) # print(df_sortBLEU) # Store the top 100 predicted headline in terms of BLEU score output_file = 'BLEU.txt' df_sortBLEU.head(100).to_csv(output_file, sep='\n', index=False, line_terminator='\n-------------------------------------------------\n') print("Finished creating results summary in %s!" %output_file)
def create_time_table(df, df_tsel, htmlname, col, vd, cfg): # Prepare df and df_tsel grouped = df.groupby(col) # accurate start and end time accstarts = [] accends = [] photos = [] # insert photos into df_tsel for index,row in df_tsel.iterrows(): person = row['person'] i = person.split('_')[1] start = int( row['start'] ) end = int( row['end'] ) group = grouped.get_group(int(i)) accstart = group.query('abs(time-%f)<=30' % start).time.min() accend = group.query('abs(time-%f)<=30' % end).time.max() accstarts.append(accstart) accends.append(accend) num_photo = group.query('abs(time-%f)<=30' % start).number.tolist()[1] photo = '<img alt="not found" src="%s/%d.png" class="imgshow" onclick="goto(%d)"/>' % (vd.photo_dir, num_photo, accstart) photos.append(photo) df_tsel['photo']= photos df_tsel['accstarts'] = format_time(np.array(accstarts)) df_tsel['accends'] = format_time(np.array(accends)) df_tsel = df_tsel[['person','accstarts','accends','photo']] df_tsel = df_tsel.sort_values('accstarts') df_tsel.columns=['person','start','end','photo'] # Make a html file header ='<!DOCTYPE html> \n <html> \n <head> \n' css = '<link rel="stylesheet" href="styles.css"> <link rel="stylesheet" href="table.css"> \n' js = '<script src="/Users/chiachun/Exp/tagly4/demo/pvideo.js"> </script> \n' header2 = '</head> \n <body> ' lvideo1 = ' <div style="float:left;margin-right:15px;"> <video id="Video1" height="400" controls> ' lvideo2 = '<source src="%s" type="video/mp4"> </video> </div> \n' % cfg.videoName div1 = '<div style="overflow-x:auto;">\n' div2 ='</div> </body> </html>' pd.set_option('display.max_colwidth', -1) f = open(htmlname,'w') f.write(header); f.write(css); f.write(js); f.write(header2); f.write(lvideo1); f.write(lvideo2); f.write(div1); f.write(df_tsel.to_html(escape=False,index=False)) f.write(div2) f.close()
def run(self): print self.args ''' dateline=%s" % day ''' day = self.args[2] pandas.set_option('display.width', 200) d2 = self.mysql.getRecord("select s_code from s_stock_list where dateline=%s" % day) for row in d2: s_code = row['s_code'] #if s_code != 'sh600000': # continue self._chQ = self.getChuQuan(s_code) sql_data = "select s_code,code,dateline,chg_m,chg,open,close,high,low,last_close,name FROM s_stock_trade WHERE s_code ='%s' and dateline >20150101 " % s_code print sql_data tmpdf2 = pandas.read_sql(sql_data, self.mysql.db) tmpdf = tmpdf2.apply(self.format_chuquan_hanlder, axis=1) tmpdf.sort_values(by=('dateline'), ascending=False) ma_list = [5, 10, 20, 30, 60] for ma in ma_list: tmpdf['MA_' + str(ma)] = pandas.rolling_mean(tmpdf['close'], ma) last5 = tmpdf.tail(60) #print last5 #sys.exit() for i5 in range(0, len(last5)): if str(last5.iloc[i5].dateline) != day: continue word = s_code[2:] + str(last5.iloc[i5].dateline) if math.isnan(last5.iloc[i5].MA_5): break if math.isnan(last5.iloc[i5].MA_10): break _m60 = last5.iloc[i5].MA_60 if math.isnan(last5.iloc[i5].MA_60): _m60 = 0 else: _m60 = round(_m60, 2) _m30 = last5.iloc[i5].MA_30 if math.isnan(last5.iloc[i5].MA_30): _m30 = 0 else: _m30 = round(_m30, 2) item = {} item['s_code'] = s_code item['dateline'] = last5.iloc[i5].dateline item['hash'] = hashlib.md5(word).hexdigest() item['ma5'] = round(last5.iloc[i5].MA_5, 2) item['ma10'] = round(last5.iloc[i5].MA_10, 2) item['ma20'] = round(last5.iloc[i5].MA_20, 2) item['ma30'] = _m30 item['ma60'] = _m60 self.mysql.dbInsert('s_stock_average', item)
def min_data(self): sql_data = "select * FROM s_stock_runtime WHERE dateline =20160607 and s_code='sz000048' " tmpdf = pandas.read_sql(sql_data, self.mysql.db) pandas.set_option('display.width', 400) # ???????period_type??????'W'??'M'????'Q'?????'5min'?12??'12D' period_type = 'W' #???? # ??date????index tmpdf.set_index('date_str', inplace=True) period_stock_data = tmpdf.resample('1Min', how='last') #period_stock_data = #print len(period_stock_data) #print period_stock_data['B_1_price'].sum() period_stock_data['MA_1'] = pandas.rolling_mean(period_stock_data['B_1_price'], 1) #period_stock_data = tmpdf.resample('5Min', how='last') print period_stock_data sys.exit() df = pandas.DataFrame(columns=('k', 'v')) data = {} j = 0 for i in range(len(tmpdf)): #print tmpdf.iloc[i] _min = tmpdf.iloc[i].min_sec #print _min if _min > 150000 and '150000' in data.keys(): continue _min = str(_min) _min = _min[0:-2] #print _min # sys.exit() #[0:-2] _min_str = "%s00" % _min #data[_min_str] = if _min_str not in data.keys(): #data = {'k': _min_str, 'v': tmpdf.iloc[i].B_1_price} j += 1 data[_min_str] = {'v': tmpdf.iloc[i].B_1_price} df.loc[j] = {'k': _min_str, 'v': tmpdf.iloc[i].B_1_price} #j += 1 #data.append(_v) #sys.exit() print df #print tmpdf
def init(self, setting): #self.mysql = sMysql(MYSQL_DB['host'], MYSQL_DB['user'], MYSQL_DB['password'], MYSQL_DB['dbname']) limit = 100 if 'limit' in setting.keys(): limit = setting['limit'] _where = [] s_keys_list = setting.keys() if 'start' not in s_keys_list and 'end' not in s_keys_list: print u"StartTime OR EndTime is Error" sys.exit() _today = self.tools.d_date('%Y%m%d') if 'end' not in setting.keys(): setting['end'] = _today if 'start' not in setting.keys(): setting['start'] = setting['end'] if setting['start'] == setting['end']: _where.append(" dateline = %s" % setting['end']) else: _where.append(" dateline <= %s" % setting['end']) _where.append(" dateline >= %s" % setting['start']) if 'universe' in setting.keys(): s_codes = " s_code in(%s)" % self.___set_universe(setting['universe']) _where.append(s_codes) _wheres = ' AND '.join(_where) print u"=======????===%s====" % setting['end'] date_sql = "select dateline FROM s_opening_day WHERE dateline <=%s order by dateline desc limit %s" % (setting['end'], limit) print date_sql temp = self.mysql.getRecord(date_sql) self.today = _today self.lastDay = temp[0]['dateline'] self.yestoday = temp[1]['dateline'] pandas.set_option('display.width', 200) sql_data = "select s_code,code,dateline,chg_m,chg,open,close,high,low,last_close,name,amount,run_market FROM s_stock_trade WHERE %s " % _wheres #print sql_data #sys.exit() tmpdf = pandas.read_sql(sql_data, self.mysql.db) #print tmpdf #sys.exit() #???????? if ('is_open_chuquan' in setting.keys()) and setting['is_open_chuquan']: self._chQ = self.getChuQuan() #print self._chQ #sys.exit() self.df = tmpdf.apply(self.format_chuquan_hanlder, axis=1) else: self.df = tmpdf #print self.df #sys.exit() self.todayDF = self.df[self.df.dateline == int(self.lastDay)] self.yestodayDF = self.df[self.df.dateline == int(self.yestoday)] #sys.exit() print "========init Days & init stock trader Done."
def main(): #?????????????????, ????????? stock_list = {"zsyh":"600036","jsyh":"601939","szzs":"000001","pfyh":"600000","msyh":"600061"} for stock, code in stock_list.items(): globals()[stock] = tsh.get_hist_data(code,start="2015-01-01",end="2016-04-16") #code:?????start:?????end:???? #print(zsyh) #??????????? make_end_line() print(zsyh.head()) make_end_line() print(zsyh.columns) make_end_line() """ ???? date??? open???? high???? close???? low???? volume???? price_change????? p_change???? ma5?5??? ma10?10??? ma20: 20??? v_ma5: 5??? v_ma10: 10??? v_ma20: 20??? turnover:???[???????] """ print(zsyh.describe()) make_end_line() print(zsyh.info()) make_end_line() plt.show(zsyh["close"].plot(figsize=(12,8))) #??????????? #pd.set_option("display.float_format", lambda x: "%10.3f" % x) plt.show(zsyh["volume"].plot(figsize=(12,8))) zsyh[["close","ma5","ma10","ma20"]].plot(subplots = True) plt.show() plt.show(zsyh[["close","ma5","ma10","ma20"]].plot(figsize=(12,8),linewidth=2)) plt.show(zsyh["p_change"].plot()) plt.show(zsyh["p_change"].plot(figsize=(10,4),legend=True,linestyle="--",marker="o")) #??????????? plt.show(zsyh["p_change"].hist(bins=20)) plt.show(zsyh["p_change"].plot.kde()) #????? #?????(kernel density estimation)????????????????? plt.show(sns.kdeplot(zsyh["p_change"].dropna())) plt.show(sns.distplot(zsyh["p_change"].dropna())) #??????????????????????
def explain_group(parent): """Scheduler explain CLI group.""" def _print_frame(df): """Prints dataframe.""" if not df.empty: pd.set_option('display.max_rows', None) pd.set_option('float_format', lambda f: '%f' % f) pd.set_option('expand_frame_repr', False) print(df.to_string(index=False)) @parent.group() def explain(): """Explain scheduler internals""" pass @explain.command() @click.option('--instance', help='Application instance') @click.option('--partition', help='Cell partition', default='_default') @cli.admin.ON_EXCEPTIONS def queue(instance, partition): """Explain the application queue""" cell_master = make_readonly_master() frame = reports.explain_queue(cell_master.cell, partition, pattern=instance) _print_frame(frame) @explain.command() @click.argument('instance') @click.option('--mode', help='Tree traversal method', type=click.Choice(reports.WALKS.keys()), default='default') @cli.admin.ON_EXCEPTIONS def placement(instance, mode): """Explain application placement""" cell_master = make_readonly_master() if instance not in cell_master.cell.apps: cli.bad_exit('Instance not found.') app = cell_master.cell.apps[instance] if app.server: cli.bad_exit('Instace already placed on %s' % app.server) frame = reports.explain_placement(cell_master.cell, app, mode) _print_frame(frame) del queue del placement