我们从Python开源项目中,提取了以下50个代码示例,用于说明如何使用pandas.ExcelWriter()。
def write_data(self, file_descr): """ Use dataframe to_excel to write into file_descr (filename) - open first if file exists. """ if os.path.isfile(file_descr): print(file_descr, 'exists') # Solution to keep existing data book = load_workbook(file_descr) writer = pd.ExcelWriter(file_descr, engine='openpyxl') writer.book = book writer.sheets = dict((ws.title, ws) for ws in book.worksheets) self.data_df.to_excel(writer, sheet_name='Mongo_Schema', index=True, float_format='%.2f') writer.save() else: self.data_df.to_excel(file_descr, sheet_name='Mongo_Schema', index=True, float_format='%.2f')
def write_excel(filename, **kwargs): """Write data tables to an Excel file, using kwarg names as sheet names. Parameters ---------- filename : str The filename to write to. kwargs : dict Mapping from sheet names to data. """ writer = pd.ExcelWriter(filename) for sheet_name, obj in kwargs.items(): if isinstance(obj, dict): obj = _params_dict_to_dataframe(obj) if isinstance(obj, pd.DataFrame): obj.to_excel(writer, sheet_name=sheet_name) writer.save() writer.close()
def write_frame(f, excel_writer, to_excel_args=None): """ Write a Pandas DataFrame to excel by calling to_excel, returning an XLMap, that can be used to determine the position of parts of f, using pandas indexing. Parameters ---------- f : DataFrame Frame to write to excel excel_writer : str or ExcelWriter Path or existing Excel Writer to use to write frame to_excel_args : dict Additional arguments to pass to DataFrame.to_excel, see docs for DataFrame.to_excel Returns ------- XLMap : Mapping that corresponds to the position in the spreadsheet that frame was written to. """ xlf = XLDataFrame(f) return xlf.to_excel(excel_writer, **to_excel_args)
def writeExcelOutput(self,table): if isinstance(table,QTabWidget): filename = table.currentWidget().model()._filename writer = pd.ExcelWriter(filename, engine='xlsxwriter') for i in range(table.count()): data=table.widget(i) sheetname=table.tabText(i) data.model()._df.to_excel(writer, sheet_name=sheetname,index=False) data.model()._dirty = False writer.save() if isinstance(table,QTableView): filename = table.model()._filename writer = pd.ExcelWriter(filename, engine='xlsxwriter') table.model()._df.to_excel(writer, sheet_name='Sheet 1',index=False) table.model()._dirty = False writer.save()
def export_pingan_trust16(): issuers = pd.read_excel('../peace/??????.xlsx',sheetname=[0], header = 0)[0] writer = pd.ExcelWriter('time_searies.xlsx') for company in issuers['name']: print(company) query = db.pingan_total.find({"name":company},{'_id':0,'name':0}).sort("rptDate" , 1) data = pd.DataFrame(list(query)) data = data[ ['??','??','??','??','??', '??', '??','??', 'rptDate']] data = data.rename(columns={"??":"???????","??":"????","??":"?????","??":"???????"}) data.to_excel(writer, sheet_name=company) writer.save() #db = client.companies #collection = db.total_nums #insert_record = json.loads(result.to_json(orient='records')) #ret = db.total_nums.insert_many(insert_record) # ???? #collection = db. #df = sued_in_arrears() #insert_record = json.loads(df.to_json(orient='records')) #ret = db.collection.insert_many(insert_record)
def gen_decile_table(model_top_precision, valid_df): """ Creates the decile table """ valid_top_precision_df = valid_df[valid_df.model_id == model_top_precision] valid_dec_sort_df = valid_top_precision_df.sort_values('y_pred_proba', ascending = False) total_breaks = valid_dec_sort_df.y_true.sum() total_blocks = len(valid_dec_sort_df) dec_breaks = total_breaks / 10 dec_blocks = total_blocks / 10 decile_df = pd.DataFrame(columns=('model_id','Decile', 'No_of_blocks','risk_mul', 'Actual_breaks', 'Precision_in_decile', 'Recall_overall', 'Lift_above_random')) for i in range(10): break_sum = valid_dec_sort_df.y_true[i*dec_blocks:(i+1)*dec_blocks].sum() risk_mul = valid_dec_sort_df.y_pred_proba[i*dec_blocks:(i+1)*dec_blocks].sum() lift = break_sum / dec_breaks conversion = break_sum *100 / dec_blocks recall = break_sum *100 / total_breaks decile_df.loc[len(decile_df)] = [model_top_precision,i+1,dec_blocks,risk_mul ,break_sum, conversion, recall, lift] decile_df.loc[len(decile_df)] = ['-', 'Total',total_blocks, '_' ,total_breaks, total_breaks/total_blocks, '-', '-'] writer = pd.ExcelWriter('decile_table.xlsx', engine='xlsxwriter') decile_df.to_excel(writer, sheet_name='Sheet1')
def save_performance(self, *args): w = pd.ExcelWriter("performance&%s.xls" % datetime.now().strftime("%Y-%m-%d-%H-%M-%S")) def iter_save(dict_like, name=None): for key, data in dict_like.items(): table = key if not name else name + "_" + key if isinstance(data, dict): iter_save(data, key) continue elif isinstance(data, pd.Series): data = pd.DataFrame(data) try: data.to_excel(w, table) except Exception as e: print(e.message) print("%s can not be saved as .xls file" % table) print(data) iter_save(self.output(*args)) w.save()
def merge_gender_data(**kwargs): pni = kwargs[fpn.PN_INPUT] # get index from source data dict df = pd.DataFrame(index=pni.data_dict.keys()) for k, v in kwargs.items(): if k not in fpn.PIPE_NODE_KWARGS: for gender in ('M', 'F'): df[k + '_' + gender] = v[gender] return df #@fpn.pipe_node #def write_xlsx(**kwargs): #pni = kwargs[fpn.PN_INPUT] #xlsx_fp = os.path.join(pni.output_dir, 'output.xlsx') #xlsx = pd.ExcelWriter(xlsx_fp) #for k, df in pni.store_items(): #df.to_excel(xlsx, k) #xlsx.save() #return xlsx_fp
def approach_pipe_4b(): a = (PN4.name_count_per_year(lambda n: n.lower().startswith('lesl')) | PN4.percent | fpn.store('lesl')) b = (PN4.name_count_per_year(lambda n: n.lower().startswith('dana')) | PN4.percent | fpn.store('dana')) f = (PN4.merge_gender_data(lesl=a, dana=b) | PN4.year_range(1920, 2000) | fpn.store('merged') * 100 | PN4.plot('gender.png') | PN4.open_plot) pni = PN4.PNI('/tmp') f[pni] xlsx_fp = os.path.join(pni.output_dir, 'output.xlsx') xlsx = pd.ExcelWriter(xlsx_fp) for k, df in pni.store_items(): df.to_excel(xlsx, k) xlsx.save() os.system('libreoffice --calc ' + xlsx_fp)
def GenerateXLSX(X, which=1): columns = [] # generate column names for dimension in ['White', 'Black', 'Player', 'Opponent', 'Empty']: for i in range(1, 9): for char in 'abcdefgh': position = 'Position: ' + char + str(i) + ' (' + dimension + ')' columns.append(position) columns.append('White\'s Move Preceded This State') columns.append('Outcome') frame = pd.DataFrame(X, columns=columns) if which==1: writer = pd.ExcelWriter(r'/Users/TeofiloZosa/PycharmProjects/BreakthroughANN/value_net_rank_binary/NPDataSets/WBPOE/UnshuffledBinaryFeaturePlanesDataset1.xlsx', engine='xlsxwriter') else: writer = pd.ExcelWriter(r'/Users/TeofiloZosa/PycharmProjects/BreakthroughANN/value_net_rank_binary/NPDataSets/WBPOE/UnshuffledBinaryFeaturePlanesDataset2.xlsx', engine='xlsxwriter') frame.to_excel(writer, 'Sheet1') writer.save()
def parse_statistics(logfile): xl = pd.ExcelFile(logfile) df = xl.parse("Sheet") df = df.sort_values(by='Line Numbers') writer = pd.ExcelWriter(logfile) df.to_excel(writer, sheet_name='Sheet', index=False) writer.save() wb = openpyxl.load_workbook(logfile) ws = wb.active row_count = ws.max_row column_count = ws.max_column chart = ScatterChart() chart.title = "Time upload domain names" chart.style = 13 chart.x_axis.title = "Line numbers" chart.y_axis.title = "Time, sec" xvalues = Reference(ws, min_col=1, min_row=2, max_row=row_count) color_choice = ['3F888F', 'D24D57'] for i in range(2, column_count + 1): values = Reference(ws, min_col=i, min_row=1, max_row=row_count) series = Series(values, xvalues, title_from_data=True) series.marker.symbol = "diamond" series.graphicalProperties.line.solidFill = color_choice[i-2] series.marker.graphicalProperties.line.solidFill = color_choice[i-2] series.marker.graphicalProperties.solidFill = color_choice[i-2] series.graphicalProperties.line.width = 20000 chart.series.append(series) chart.legend.legendPos = 'b' ws.add_chart(chart) wb.save(logfile)
def compare_securities_2x2(sec_list, weeks, thresh=0.0): """ Returns an excel sheet with stock name, this week's percentage change, mean of next week's predicted percentage change, and standard deviation of next week's predicted percentage change :param sec_list: <list> with all the security names :param weeks: <int> Number of weeks since the most recent recorded date (cannot use years/months because months and years have varying quantities of days; Numpy requires constancy in datetime arithmetic) :param thresh: <float> divides percentage changes into two categories (>= and <); applies to each security """ sec_dict = {} for name in sec_list: sec_info = predict_percentage_change(name, weeks=weeks, threshold=thresh) sec_dict[name] = sec_info sec_df = pd.DataFrame(sec_dict).transpose() sec_df.columns = ['Last % Change', "Mean Predicted % Change", "Standard Deviation " + "Predicted % Change"] sec_df= sec_df.sort_values(by=["Mean Predicted % Change"], ascending=True) writer = pd.ExcelWriter('output.xlsx') sec_df.to_excel(writer, 'Sheet1') writer.save() #compare_securities_2x2(["BAC", "AAPL", "GOOG", "T"], weeks=26, thresh=2.0)
def save_xls(self, dframe): # ???????????excel?????sheet xls_path = os.path.join(current_folder, self.name + '.xlsx') if os.path.exists(xls_path): # excel ?????? book = load_workbook(xls_path) writer = pd.ExcelWriter(xls_path, engine='openpyxl') writer.book = book writer.sheets = dict((ws.title, ws) for ws in book.worksheets) dframe.to_excel(writer, self.name) writer.save() else: # ?????? writer = ExcelWriter(xls_path) dframe.to_excel(writer, self.name) writer.save()
def anon_pay_table(case, proportional=True, mult=1.0,): '''Anonymize the "rates" worksheet of the "pay_tables.xlsx" input file. The rates may be proportionally adjusted (larger or smaller) or disproportionally adjusted with a fixed algorithm. A copy of the original excel file is copied and saved as "pay_tables_orig.xlsx". All modifications are inplace. inputs case (string) the case name proportional (boolean) if True, use the mult input to increase or decrease all of the "rates" worksheet pay data proportionally. If False, use a fixed algorithm to disproportionally adjust the pay rates. mult (integer or float) if the proportional input is True, multiply all pay rate values by this input value ''' inplace = True path, d = copy_excel_file(case, 'pay_tables', return_path_and_df=True) df = d['rates'] anon_pay(df, proportional=proportional, mult=mult, inplace=inplace) d['rates'] = df with pd.ExcelWriter(path, engine='xlsxwriter') as writer: for ws_name, df_sheet in d.items(): df_sheet.to_excel(writer, sheet_name=ws_name) print('\nanon_pay_table routine complete')
def save_excel(): df = ts.get_today_all() df.to_excel('1.xls', sheet_name='all_stock') df2 = ts.get_hist_data('300333') df2.to_excel('1.xls', sheet_name='basic_info') df.ExcelWriter out = pd.ExcelWriter("2.xls") df.to_excel()
def report(simulation, filename='report.xls'): """ This function generates a report as an excel sheet. simulation the simualation that should be exported to excel filename filename of the excel file """ writer = pd.ExcelWriter(filename) for account in simulation.accounts: df = account.report.as_df() df.to_excel(writer, sheet_name=account.name) writer.save()
def to_excel(self, filename='myfile.xlsx'): """Export informations to a excel file Kargs: filename: string Name of the excel file ex: filename='myfile.xlsx' """ writer = ExcelWriter(filename) self.clfinfo.to_excel(writer,'Classifier') self.statinfo.to_excel(writer,'Statistics') try: self.featinfo.to_excel(writer,'Features') except: warn('Informations about features has been ignored. Run fit()') writer.save()
def close(self, format='csv'): import pandas as pd if format == 'csv': pd.DataFrame.from_dict(self._data, orient='index').to_csv(self._file) elif format == 'json': pd.DataFrame.from_dict(self._data, orient='index').to_json(self._file) else: writer = pd.ExcelWriter(self._file) pd.DataFrame.from_dict(self._data, orient='index').to_excel(writer) writer.save()
def main(workspace, path, name): table_name = ['accessPoints', 'ProbeRequests', 'ProbeRequests', 'EAP', 'Hidden_SSID', 'inscope_accessPoints', 'inscope_ProbeRequests', 'inscope_ProbeResponses'] sheet_name = ['AccessPoints', 'ProbeRequests', 'ProbeRequests', 'EAP', 'Hidden_SSID', 'Inscope_AccessPoints', 'Inscope_ProbeRequests', 'Inscope_ProbeResponses'] ws = workspace q = queries() ws1 = q.db_connect(ws) writer = dp.ExcelWriter(path+name+'.xlsx', engine='xlsxwriter') j = 0 print "Exporting: "+path+name+'.xlsx' for tbn in table_name: try: td = dp.read_sql('select * from '+tbn+'', ws1) if td.empty: pass j +=1 print colors.RD + "[-]" + colors.NRM + " Skipping: " + sheet_name[j] + ". No Data in table." else: td.to_excel(writer, sheet_name=''+sheet_name[j]+'', index=False) j +=1 print colors.GRN + "[+]" + colors.NRM + " Exporting: " + sheet_name[j] + "." except ValueError: continue except pandas.io.sql.DatabaseError: continue writer.save() print "Export Completed"
def to_xls(self, module_name, file_name): ''' Takes the file specified by module and file names and writes an xls in the same directory with the same name (changing the file extension). Columns of the original file will be written in the first sheet. Columns containing "__" will be written the second sheet Use for download only! INPUT: - module_name: - file_name: ''' raise DeprecationWarning('Excel download currently not supported due'\ 'to potential memory issues with large files') file_path = self.path_to(module_name, file_name) assert file_name[-4:] == '.csv' new_file_name = file_name[:-4] + '.xlsx' new_file_path = self.path_to(module_name, new_file_name) tab = pd.read_csv(file_path, encoding='utf-8', dtype=str) columns_og = [x for x in tab.columns if '__' not in x] columns_new = [x for x in tab.columns if '__' in x] writer = pd.ExcelWriter(new_file_path) tab[columns_og].to_excel(writer, 'original_file', index=False) tab[columns_new].to_excel(writer, 'normalization', index=False) writer.save() return new_file_name
def export_to(self, file_path, batchsize=1000): self.xls_writer = ExcelWriter(file_path) # get record count record_count = self._query_mongo(count=True) # query in batches and for each batch create an XLSDataFrameWriter and # write to existing xls_writer object start = 0 header = True while start < record_count: cursor = self._query_mongo(self.filter_query, start=start, limit=batchsize) data = self._format_for_dataframe(cursor) # write all cursor's data to their respective sheets for section_name, section in self.sections.iteritems(): records = data[section_name] # TODO: currently ignoring nested repeats # so ignore sections that have 0 records if len(records) > 0: # use a different group delimiter if needed columns = section["columns"] if self.group_delimiter != DEFAULT_GROUP_DELIMITER: columns = [self.group_delimiter.join(col.split("/")) for col in columns] columns = columns + self.EXTRA_COLUMNS writer = XLSDataFrameWriter(records, columns) writer.write_to_excel(self.xls_writer, section_name, header=header, index=False) header = False # increment counter(s) start += batchsize time.sleep(0.1) self.xls_writer.save()
def setUpClass(cls): cls.f = test_frame cls.writer = pd.ExcelWriter(path_for('charts', cls.__name__), engine=cls.to_excel_args['engine']) cls.xlmap = cls.f.to_excel(cls.writer, **cls.to_excel_args) cls.workbook = cls.xlmap.writer.book return cls
def export_pingan_trust16(): issuers = pd.read_excel('../peace/??????.xlsx',sheetname=[0], header = 0)[0] writer = pd.ExcelWriter('time_searies_all.xlsx') for company in issuers['name']: print(company) query = db.pingan_total.find({"name":company},{'_id':0,'name':0}).sort("rptDate" , 1) data = pd.DataFrame(list(query)) data = data[ ['??','??','??','??','??', '??', '??','??', 'rptDate']] data = data.rename(columns={"??":"???????","??":"????","??":"?????","??":"???????"}) data.to_excel(writer, sheet_name=company) writer.save()
def process(): df=pd.read_excel(DATA_FILE,sheetname=[1], header = 0,index_col=0,convert_float=False)[1] # df0 = df[1].fillna(0) # df = df.head(100) # resultList=np.zeros(df.index.size) resultList=[] idList=[] for i in range(df.index.size): for nan in range(1,40): if math.isnan(df.values[i][nan]): # print(df[1].values[i]) df.values[i][40]=np.nan break if nan < 39: # for nan results idList.append(df.index[i]) resultList.append([df.values[i][0],0]) print("%s:%s"%(df.index[i],df.values[i][0])) continue ID=df.index[i] df.values[i][0] data=df.values[i][1:40].reshape(13,3) # handle nulls # print(data) industryScore = calcIndustry(ID,data) trendScore = calcTrend(ID,data) fluncScore = calcFluctuation(ID,data) a = np.append(industryScore,trendScore) b= np.append(a,fluncScore) idList.append(ID) resultList.append([df.values[i][0],calcTotal(b)]) # print("%s:%f"%(ID,resultList[i])) resultdf=pd.DataFrame(resultList,idList,columns=['NAME','Score']) with pd.ExcelWriter('result.xls') as writer: resultdf.to_excel(writer,sheet_name=str(0))
def tasks_to_spreadsheet(tasks, filepath): import pandas df_tasks = pandas.DataFrame.from_records([ task.to_dict() for task in tasks ]) resources = set(resource for task in tasks for resource in task.resources) df_resources = pandas.DataFrame.from_records([ resource.to_dict() for resource in resources ]) with pandas.ExcelWriter(filepath, engine='xlsxwriter') as writer: df_tasks.to_excel(writer, sheet_name='tasks', index=False) df_resources.to_excel(writer, sheet_name='resources', index=False)
def main(argv): parser = argparse.ArgumentParser() parser.add_argument('--inputdir', help='Input directory to be converted', required=True) parser.add_argument('--outdir', help='Output directory', required=True) parser.add_argument('--cols', help='Columns to include', default=DEFAULT_COLUMNS) parser.add_argument('--sortby', help='Row that is going to be used for sorting', default='prediced_score') parser.add_argument('--ascending', help='Sort in ascending order (def. False)', default=False) parser.add_argument('--newcols', help='Columns to be added', default=['LABEL']) args = parser.parse_args(argv) # Get a list of files to be converted filenames = glob.glob('data/output/*.csv') for filename in filenames: # Skip existing files outputfile = os.path.join(args.outdir, '.'.join(os.path.basename(filename).split('.')[:1]) + '.xls') if os.path.exists(outputfile): continue df = pd.read_csv(filename) # Sort the data df.sort(args.sortby, ascending=args.ascending, inplace=True) # Drop columns that we dont need selected_cols = args.cols.split(' ') for col in df.columns.tolist(): if selected_cols.count(col) == 0: df.drop(col, axis=1, inplace=True) # Add new cols for newcol in args.newcols: df[newcol] = '' # Store file outputfile = os.path.join(args.outdir, '.'.join(os.path.basename(filename).split('.')[:1]) + '.xls') if not os.path.exists(os.path.dirname(outputfile)): os.makedirs(os.path.dirname(outputfile)) writer = pd.ExcelWriter(outputfile, engine='xlsxwriter') df.to_excel(writer, sheet_name='Sheet1') writer.save() print('Wrote a new excel file: %s' % outputfile)
def _save_origin(self, path): # if not self.initialized: # raise ValueError("trader not initialized, no data to perform") writer = ExcelWriter(path, encoding="utf-8") pd.DataFrame(self.performance.equity).to_excel(writer, "??") self.performance.order_details.to_excel(writer, "??") writer.save()
def export(df, table, export_type='excel', target_path=None, if_exists='replace', suffix=None): if target_path: if export_type == 'excel' and not suffix: suffix = 'xlsx' target_file = os.path.join(target_path, table + '-' + str(datetime.date.today())) + '.' + str(suffix or export_type) if if_exists == 'replace' and os.path.exists(target_file): os.remove(target_file) export_io = target_file else: export_io = BytesIO() if export_type == 'excel': writer = pd.ExcelWriter(export_io, engine='xlsxwriter') df.to_excel(writer, index=False) writer.save() elif export_type == 'csv': export_io = BytesIO(df.to_csv(target_path, index=False, chunksize=4096).encode()) elif export_type == 'json': export_io = BytesIO(df.to_json(target_path, orient='records').encode()) elif export_type == 'pickle': pkl.dump(df, export_io, protocol=pkl.HIGHEST_PROTOCOL) else: raise NotImplementedError("export type {} is not supported".format(export_type)) return export_io, table + '.' + str(suffix or export_type)
def write(self, if_exists:str=None): #from pyexcelerate import Workbook self._logger.info("Writing rows to storage") #wb = Workbook() #wb.new_sheet("Sheet1", data=self.data) #wb.save("{}.xlsx".format(os.path.join(self._db_dir, self.table))) writer = pd.ExcelWriter("{}.xlsx".format(os.path.join(self._db_dir, self.table)) , engine='xlsxwriter') self.data.to_excel(writer) writer.save() self._logger.info("Finished writing rows to storage")
def to_excel(self,filename,predictColumns=[],statsColumns=[],models=[]): '''Export model predictions and statistics to an Excel workbook with one worksheet for each model. Preferred method of creating readable output. Input: filename (str) = name of Excel workbook to create predictColumns (list) = labels from the pandas dataframes to override automatic alphabetization of all dataframe labels (default behavior) statsColumns (list) = labels from the stats pandas dataframes to override write (same behavior as predictColumns) models (list) = models to export, if [], to_excel writes all predicted models by default Output: A Excel workbook with model predictions and statistics.''' assert isinstance(filename,str), "Filename provided, {}, for export() needs to be a string.".format(filename) if not models: models = self.predictions.keys() else: for m in models: assert m in self.predictions.keys(), "Model {} was not tested.".format(m) if filename[-4:] == ".xlsx": fn = filename else: fn = filename + ".xlsx" writer = pandas.ExcelWriter(fn) if predictColumns: for model in models: self.predictions[model].to_excel(writer,sheet_name=model,columns=predictColumns) if statsColumns: for model in models: if model in self.statistics.keys(): self.statistics[model].to_excel(writer,sheet_name="{}-stats".format(model),columns=statsColumns) else: print "Functional form for {} was not specified. Not writing stats.".format(model) writer.save()
def main(path_to_data, column_to_deal, output_file, input_ncores): ''' ??logging?? ''' logger = logging.getLogger('mylogger') logger.setLevel(logging.INFO) console = logging.StreamHandler() console.setLevel(logging.INFO) formatter = logging.Formatter('[%(levelname)-3s]%(asctime)s %(filename)s[line:%(lineno)d]:%(message)s') console.setFormatter(formatter) logger.addHandler(console) ''' ?????? ''' data = pd.read_excel(path_to_data) logger.info("??????...") re_sub_vec = np.vectorize(re_sub) # ????? data[column_to_deal] = re_sub_vec(data[column_to_deal]) logger.info("??????...") data['content_list'] = data[column_to_deal].map(sentence_split) seg_word = jieba4null(n_core = input_ncores) data.loc[:,'seg_words'] = data['content_list'].map(seg_word.cut_sentence) logger.info("????????...") pool = Pool(input_ncores) worker = polar_classifier() data['sentiment'] = pool.map(worker.multi_list_classify, data['seg_words']) data = data.drop(['content_list','seg_words'], axis = 1) logger.info("????????...") writer = pd.ExcelWriter(output_file) data.to_excel(writer, sheet_name='sheet1', encoding='utf-8', index=False) writer.save() logger.info("Task done!")
def _export_xlsx(self): try: import openpyxl except ImportError: msg = QtWidgets.QMessageBox() msg.setIcon(QtWidgets.QMessageBox.Critical) msg.setText("Feature Not Available") msg.setInformativeText("The Python package openpyxl must be " "installed to enable Excel export. Use " "CSV export instead.") msg.setWindowTitle("Error") msg.exec_() else: from pandas import ExcelWriter fp, _ = QtWidgets.QFileDialog.getSaveFileName(self.widget, 'Export XLSX') if not fp: return # Write each event stream to a different spreadsheet in one # Excel document. writer = ExcelWriter(fp) tables = {d['name']: self._db.get_table(self._header, stream_name=d['name']) for d in self._header.descriptors} for name, df in tables.items(): df.to_excel(writmer, name) writer.save()
def generate_report(result_dict, target_report_csv_path): from six import StringIO output_path = os.path.join(target_report_csv_path, result_dict["summary"]["strategy_name"]) try: os.mkdir(output_path) except: pass xlsx_writer = pd.ExcelWriter(os.path.join(output_path, "report.xlsx"), engine='xlsxwriter') # summary.csv csv_txt = StringIO() summary = result_dict["summary"] csv_txt.write(u"\n".join(sorted("{},{}".format(key, value) for key, value in six.iteritems(summary)))) df = pd.DataFrame(data=[{"val": val} for val in summary.values()], index=summary.keys()).sort_index() df.to_excel(xlsx_writer, sheet_name="summary") with open(os.path.join(output_path, "summary.csv"), 'w') as csvfile: csvfile.write(csv_txt.getvalue()) for name in ["total_portfolios", "stock_portfolios", "future_portfolios", "stock_positions", "future_positions", "trades"]: try: df = result_dict[name] except KeyError: continue # replace all date in dataframe as string if df.index.name == "date": df = df.reset_index() df["date"] = df["date"].apply(lambda x: x.strftime("%Y-%m-%d")) df = df.set_index("date") csv_txt = StringIO() csv_txt.write(df.to_csv(encoding='utf-8')) df.to_excel(xlsx_writer, sheet_name=name) with open(os.path.join(output_path, "{}.csv".format(name)), 'w') as csvfile: csvfile.write(csv_txt.getvalue()) # report.xls <--- ??sheet??? xlsx_writer.save()
def add_editor_list_to_excel(case=None): '''save editor tool list order to the excel input file, "proposals.xlsx". The list order will be saved to a new worksheet, "edit". Subsequent saved lists will overwrite previous worksheets. Change the worksheet name of previously saved worksheets from "edit" to something else prior to running this function if they are to be preserved within the workbook. The routine reads the case_dill.pkl file - this provides a write path to the correct case study folder and excel "proposals.xlsx" file. Then the routine reads the editor-produced p_new_order.pkl file and writes it to the new worksheet "edit" in the proposals.xlsx file. input case (string) The case study name (and consequently, the write file path). This variable will default to the stored case study name contained within the "dill/case_dill.pkl" file if no input is supplied by the user. ''' if not case: try: case = pd.read_pickle('dill/case_dill.pkl').case.value except OSError: print('case variable not found,', 'tried to find it in "dill/case_dill.pkl"', 'without success\n') return xl_str = 'excel/' + case + '/proposals.xlsx' df = pd.read_pickle('dill/p_new_order.pkl') df = df.reset_index()[['empkey']] df.index = df.index + 1 df.index.name = 'order' ws_dict = pd.read_excel(xl_str, index_col=0, sheetname=None) ws_dict['edit'] = df with pd.ExcelWriter(xl_str, engine='xlsxwriter') as writer: for ws_name, df_sheet in ws_dict.items(): df_sheet.to_excel(writer, sheet_name=ws_name) # Pretty print a dictionary...
def update_excel(case, file, ws_dict={}, sheets_to_remove=None): '''Read an excel file, optionally remove worksheet(s), add worksheets or overwrite worksheets with a dictionary of ws_name, dataframe key, value pairs, and write the excel file back to disk inputs case (string) the data model case name file (string) the excel file name without the .xlsx extension ws_dict (dictionary) dictionary of worksheet names as keys and pandas dataframes as values. The items in this dictionary will be passed into the excel file as worksheets. The worksheet name keys may be the same as some or all of the worksheet names in the excel file. In the case of matching names, the data from the input dict will overwrite the existing data (worksheet) in the excel file. Non-overlapping worksheet names/dataframe values will be added as new worksheets. sheets_to_remove (list) a list of worksheet names (strings) representing worksheets to remove from the excel workbook. It is not necessary to remove sheets which are being replaced by worksheet with the same name. ''' # read a single or multi-sheet excel file # (returns dict of sheetname(s), dataframe(s)) path = 'excel/' + case + '/' + file + '.xlsx' # make a copy of file before modifying copy_excel_file(case, file, verbose=False) # get a dictionary from the excel file consisting of worksheet name keys # and worksheet contents as values (as dataframes) try: dict0 = pd.read_excel(path, sheetname=None) except OSError: print('Error: Unable to find "' + path + '"') return # all worksheets are now accessible as dataframes. # drop worksheets which match an element in the sheets_to_remove: if sheets_to_remove is not None: for ws_name in sheets_to_remove: dict0.pop(ws_name, None) # update worksheet dictionary with ws_dict (ws_dict values will override # existing values in the case of matching worksheet name keys): dict0.update(ws_dict) # write the updated dictionary back to excel... with pd.ExcelWriter(path, engine='xlsxwriter', datetime_format='yyyy-mm-dd', date_format='yyyy-mm-dd') as writer: for sheet_name, df_sheet in dict0.items(): df_sheet.to_excel(writer, sheet_name=sheet_name)
def get(self, request): user = request.user bytes_io = io.BytesIO() writer = pd.ExcelWriter(bytes_io, engine='xlsxwriter', options={'remove_timezone': True}) # supplement events supplement_events_worksheet_name = 'SupplementEvents' supplement_events = SupplementLog.objects.filter(user=user) df_builder = SupplementEventsDataframeBuilder(supplement_events) supplement_events_df = df_builder.get_flat_daily_dataframe() self._write_to_workbook(writer, supplement_events_df, supplement_events_worksheet_name) # sleep events sleep_activities_worksheet_name = 'SleepActivities' sleep_activities = SleepLog.objects.filter(user=user) df_builder = SleepActivityDataframeBuilder(sleep_activities) sleep_activities_series = df_builder.get_sleep_history_series() self._write_to_workbook(writer, sleep_activities_series, sleep_activities_worksheet_name) # user activity events user_activity_events_sheet_name = 'UserActivityEvents' user_activity_events = UserActivityLog.objects.filter(user=user) df_builder = UserActivityEventDataframeBuilder(user_activity_events) user_activity_events_df = df_builder.get_flat_daily_dataframe() self._write_to_workbook(writer, user_activity_events_df, user_activity_events_sheet_name) # productivity logs productivity_log_sheet_name = 'DailyProductivityLog' productivity_log = DailyProductivityLog.objects.filter(user=user) df_builder = ProductivityLogEventsDataframeBuilder(productivity_log) # odd why this one isn't sorted the right way productivity_log_df = df_builder.get_flat_daily_dataframe().sort_index(ascending=True) self._write_to_workbook(writer, productivity_log_df, productivity_log_sheet_name) all_dataframes = [productivity_log_df, supplement_events_df, user_activity_events_df] concat_dataframe = pd.concat(all_dataframes, axis=1) # include sleep which is a series and not a dataframe cumulative_log_sheet_name = 'Aggregate Log' concat_dataframe[SLEEP_MINUTES_COLUMN] = sleep_activities_series self._write_to_workbook(writer, concat_dataframe, cumulative_log_sheet_name) cumulative_14_day_dataframe_sheet_name = 'Aggregate 14 Log' cumulative_14_day_dataframe = concat_dataframe.rolling(window=14, min_periods=1).sum()[14:] self._write_to_workbook(writer, cumulative_14_day_dataframe, cumulative_14_day_dataframe_sheet_name) cumulative_28_day_dataframe_sheet_name = 'Aggregate 28 Log' cumulative_28_day_dataframe = concat_dataframe.rolling(window=28, min_periods=1).sum()[28:] self._write_to_workbook(writer, cumulative_28_day_dataframe, cumulative_28_day_dataframe_sheet_name) # make sure all the output gets writen to bytes io writer.close() # http response because we are providing data and not doing any template / rendering response = HttpResponse( bytes_io.getvalue(), content_type='application/vnd.openxmlformats-officedocument.spreadsheetml.sheet') response['Content-Disposition'] = 'attachment; filename=user_export_data.xlsx' return response
def road_runner(roadbox, exectime, savepath, timestep='sec', st=True, sm=True, bar=True, ownfun=None): print _tips.INFO('Process start...', 'GREEN') if ownfun != None and bar is False: raise ValueError(_tips.INFO("ownfun isn't None while bar is False", 'RED')) if bar is True: loop = trange(exectime) info_d = loop.set_description info_p = loop.set_postfix info_r = loop.refresh info_w = loop.write else: loop = range(exectime) info_d = __empty info_p = __empty info_r = __empty info_w = __empty roadstbox = [] summarydata = pd.DataFrame(KEY) tsdata = pd.DataFrame({'ROAD_HASH_ID':[], 'LANE_ID':[], 'TIME_STAMP':[], 'LOCATE':[]}) writer = pd.ExcelWriter(savepath+'.xlsx') for road in roadbox: rds = RoadStatus(road,timestep=timestep) roadstbox.append(rds) info_p(stdata=str(st), summarydata=str(sm)) info_d(_tips.INFO('Collecting data', 'GREEN')) for i in loop: try: for road in roadbox: road.reflush_status() for stat in roadstbox: if sm is True: temp = stat.summary() if len(temp) != 0: summarydata = summarydata.append(temp) if st is True: temp = stat.get_time_space() tsdata = tsdata.append(temp) if ownfun is not None: info_w(ownfun()) except: info_d(_tips.INFO('FAILED!', 'RED')) info_r() raise KeyError print _tips.INFO('Start writing data...', 'BLUE') summarydata.to_excel(writer, 'SummaryData', index=False) tsdata.to_excel(writer, 'SpaceTimeData', index=False) writer.save() print _tips.INFO('Done', 'GREEN')
def make_minimal_neoepitope_report( ranked_variants_with_vaccine_peptides, num_epitopes_per_peptide=None, excel_report_path=None): """ Creates a simple Excel spreadsheet containing one neoepitope per row Parameters ---------- ranked_variants_with_vaccine_peptides : Ranked list of (variant, list of its vaccine peptides) num_epitopes_per_peptide : int The number of epitopes to include for each vaccine peptide; these are sorted before cutoff. If None, all epitopes will be included in the output excel_report_path : str Path to which to write the output Excel file """ rows = [] # each row in the spreadsheet is a neoepitope for (variant, vaccine_peptides) in ranked_variants_with_vaccine_peptides: for vaccine_peptide in vaccine_peptides: # only include mutant epitopes for epitope_prediction in vaccine_peptide.mutant_epitope_predictions: row = OrderedDict([ ('Allele', epitope_prediction.allele), ('Mutant peptide sequence', epitope_prediction.peptide_sequence), ('Score', vaccine_peptide.mutant_epitope_score), ('Predicted mutant pMHC affinity', '%.2f nM' % epitope_prediction.ic50), ('Variant allele RNA read count', vaccine_peptide.mutant_protein_fragment.n_alt_reads), ('Wildtype sequence', epitope_prediction.wt_peptide_sequence), ('Predicted wildtype pMHC affinity', '%.2f nM' % epitope_prediction.wt_ic50), ('Gene name', vaccine_peptide.mutant_protein_fragment.gene_name), ('Genomic variant', variant.short_description), ]) rows.append(row) if len(rows) > 0: df = pd.DataFrame.from_dict(rows) writer = pd.ExcelWriter(excel_report_path, engine='xlsxwriter') df.to_excel(writer, sheet_name='Neoepitopes', index=False) # resize columns to be not crappy worksheet = writer.sheets['Neoepitopes'] worksheet.set_column('%s:%s' % ('B', 'B'), 23) worksheet.set_column('%s:%s' % ('D', 'D'), 27) worksheet.set_column('%s:%s' % ('E', 'E'), 26) worksheet.set_column('%s:%s' % ('F', 'F'), 17) worksheet.set_column('%s:%s' % ('G', 'G'), 30) worksheet.set_column('%s:%s' % ('H', 'H'), 9) worksheet.set_column('%s:%s' % ('I', 'I'), 18) writer.save() logger.info('Wrote XLSX neoepitope report file to %s', excel_report_path)
def to_excel(self, excel_writer, sheet_name='Sheet1', na_rep='', float_format=None, columns=None, header=True, index=True, index_label=None, startrow=0, startcol=0, engine=None, merge_cells=True, encoding=None, inf_rep='inf', verbose=True, **kwargs): """ Monkeypatched DataFrame.to_excel by xl_link! Changes: -------- Returns ------- XLMap corresponding to position of frame as it appears in excel (see XLMap for details) See Also -------- Pandas.DataFrame.to_excel for info on parameters Note ---- When providing a path as excel_writer, default engine used is 'xlsxwriter', as xlsxwriter workbooks can only be saved once, xl_link suppresses calling `excel_writer.save()`, as a result, `xlmap.writer.save()` should be called once no further changes are to be made to the spreadsheet. """ if isinstance(excel_writer, pd.ExcelWriter): need_save = False else: excel_writer = pd.ExcelWriter(_stringify_path(excel_writer), engine=engine) need_save = True if excel_writer.engine != 'xlsxwriter' else False # xlsxwriter can only save once! super().to_excel(excel_writer, sheet_name=sheet_name, na_rep=na_rep, float_format=float_format, columns=columns, header=header, index=index, index_label=index_label, startrow=startrow, startcol=startcol, engine=engine, merge_cells=merge_cells, encoding=encoding, inf_rep=inf_rep, verbose=verbose, **kwargs) if need_save: excel_writer.save() data_range, index_range, col_range, _ = get_xl_ranges(self.index, self.columns, sheet_name=sheet_name, columns=columns, header=header, index=index, index_label=index_label, startrow=startrow, startcol=startcol, merge_cells=merge_cells) f = self.copy() if isinstance(columns, list) or isinstance(columns, tuple): f = f[columns] return XLMap(data_range, index_range, col_range, f, writer=excel_writer)
def make_interpolate_curve(self): """ interpolate contour curves """ lcx = np.zeros(0) lcy = np.zeros(0) lcx = np.append(lcx, np.array(self.lower_concave_in_x)[::-1]) lcx = np.append(lcx, self.lower_arc_x) lcx = np.append(lcx, np.array(self.lower_concave_out_x)) lcy = np.append(lcy, np.array(self.lower_concave_in_y)[::-1]) lcy = np.append(lcy, self.lower_arc_y) lcy = np.append(lcy, np.array(self.lower_concave_out_y)) self.lower_curve_x = lcx self.lower_curve_y = lcy self.lower_curve_x_shift = lcx self.lower_curve_y_shift = lcy + self.shift ucx = np.zeros(0) ucy = np.zeros(0) ucx = np.append(ucx, np.array(self.edge_straight_in_x)) ucx = np.append(ucx, np.array(self.upper_straight_in_x)) ucx = np.append(ucx, np.array(self.upper_convex_in_x)[::-1]) ucx = np.append(ucx, self.upper_arc_x) ucx = np.append(ucx, np.array(self.upper_convex_out_x)) ucx = np.append(ucx, np.array(self.upper_straight_out_x)) ucx = np.append(ucx, np.array(self.edge_straight_out_x)) ucy = np.append(ucy, np.array(self.edge_straight_in_y)) ucy = np.append(ucy, np.array(self.upper_straight_in_y)) ucy = np.append(ucy, np.array(self.upper_convex_in_y)[::-1]) ucy = np.append(ucy, self.upper_arc_y) ucy = np.append(ucy, np.array(self.upper_convex_out_y)) ucy = np.append(ucy, np.array(self.upper_straight_out_y)) ucy = np.append(ucy, np.array(self.edge_straight_out_y)) self.upper_curve_x = ucx self.upper_curve_y = ucy print(len(ucx),len(ucy)) x = np.linspace(ucx.min(), ucx.max(), self.num_output_points) lcy_func = interp1d(self.lower_curve_x, self.lower_curve_y) lcy_shift_func = interp1d(self.lower_curve_x_shift, self.lower_curve_y_shift) ucy_func = interp1d(self.upper_curve_x, self.upper_curve_y) self.lower_curve_x_interp = x self.lower_curve_y_interp = lcy_func(x) self.lower_curve_x_shift_interp = x self.lower_curve_y_shift_interp = lcy_shift_func(x) self.upper_curve_x_interp = x self.upper_curve_y_interp = ucy_func(x) # make pandas DataFrame to save contour tmp = [x, self.lower_curve_y_shift_interp, self.upper_curve_y_interp, self.lower_curve_y_interp] self.dfc = pd.DataFrame(tmp, index = ["x", "lower curve1", "upper curve", "lower curve2"]) if(self.is_save_excel): """ save contour in Excel file """ writer = pd.ExcelWriter("result/turbine_contour_" + self.name + ".xlsx") self.dfc.T.to_excel(writer, "contour") writer.save()
def render(self, format, kind=None, buffer=None): # Variable aliases bucket = self.bucket df = self.dataframe # Compute group name for HDF5 and NetCDF formats # TODO: Optionally prefix with "realm" from "bucket.tdata" group_name = bucket.title.short if format == 'xlsx': # http://pandas.pydata.org/pandas-docs/stable/io.html#io-excel-writer # https://stackoverflow.com/questions/28058563/write-to-stringio-object-using-pandas-excelwriter with pandas.ExcelWriter('temp.xlsx', engine='xlsxwriter') as excel_writer: excel_writer.book.filename = buffer df.to_excel(excel_writer, sheet_name=bucket.title.compact[:31], index=False) elif format in ['hdf', 'hdf5', 'h5']: # Create index from "time" column df = dataframe_index_and_sort(df, 'time') # http://pandas.pydata.org/pandas-docs/stable/io.html#hdf5-pytables t = tempfile.NamedTemporaryFile(suffix='.hdf5') try: df.to_hdf(t.name, group_name, format='table', data_columns=True, index=False) buffer.write(t.read()) except Exception as ex: return self.request.error_response(bucket, with_traceback=True) elif format in ['nc', 'cdf']: # Create index from "time" column df = dataframe_index_and_sort(df, 'time') # http://xarray.pydata.org/ # http://xarray.pydata.org/en/stable/io.html#netcdf t = tempfile.NamedTemporaryFile(suffix='.nc') try: #df.to_xarray().to_netcdf(path=t.name, group=group_name) #df.to_xarray().to_netcdf(path=t.name, format='NETCDF4', engine='h5netcdf', group=group_name) df.to_xarray().to_netcdf(path=t.name, format='NETCDF4', engine='netcdf4', group=group_name) buffer.write(t.read()) except Exception as ex: return self.request.error_response(bucket, with_traceback=True) elif format in ['dt', 'datatables']: # https://datatables.net/ # Compute data_uri, forward "from" and "to" parameters data_uri = get_data_uri(bucket, 'data.html') # Render HTML snippet containing DataTable widget page = DatatablesPage(data_uri=data_uri, bucket=bucket) bucket.request.setHeader('Content-Type', 'text/html; charset=utf-8') return renderElement(bucket.request, page)
def process(self): fields = [ "Chrom", "Position", "Covmp", "Ref", "Var", "Cons", "Fasta", "Qdepth", "Reads1", "Reads2", "Freq", "P-value", "StrandFilter", "R1+", "R1-", "R2+", "R2-" ] self.output_fasta = [] shift = int(self.shift.pop(0)) input_orig = self.input_orig.pop(0) input_shifted = self.input_shifted.pop(0) all_orig = pd.read_csv(input_orig, sep='\t', header=[0,1], na_values='-').fillna(-1) all_shifted = pd.read_csv(input_shifted, sep='\t', header=[0,1], na_values='-').fillna(-1) contents = {} for sample_id in all_orig.columns.levels[0]: fasta = '' df_orig = all_orig[sample_id] df_shifted = all_shifted[sample_id] contents[sample_id] = pd.DataFrame(columns=df_orig.columns) nrows = df_orig.index.size for idx in df_orig.index: shifted_idx = (idx+shift)%nrows # Check the two are aligned. Take into account missing positions in one of them if df_orig.loc[idx, 'Ref'] != df_shifted.loc[shifted_idx, 'Ref']: if df_orig.loc[idx, 'Ref'] != -1 and df_shifted.loc[shifted_idx, 'Ref'] != -1: print 'index =', idx, df_orig.loc[idx:idx+3, 'Ref'], df_shifted.loc[shifted_idx:shifted_idx+3, 'Ref'] raise Exception("Shifted and non-shifted summaries are not aligned") if df_orig.loc[idx, 'Qdepth'] > df_shifted.loc[shifted_idx, 'Qdepth']: contents[sample_id].loc[idx] = df_orig.loc[idx] else: contents[sample_id].loc[idx] = df_shifted.loc[shifted_idx] contents[sample_id].loc[idx, 'Position'] = df_orig.loc[idx, 'Position'] if contents[sample_id].loc[idx, 'Fasta']>-1: fasta += contents[sample_id].loc[idx, 'Fasta'] else: contents[sample_id].loc[idx, 'Fasta'] = '' output_fasta = sample_id+'.fasta' self.output_fasta.append(output_fasta) with open(output_fasta, 'w') as fh: fh.write('>'+sample_id+'\n') fh.write(textwrap.fill(fasta, width=60)) #THIS DOES NOT ALWAYS WORK: EXCEL FAILS TO READ OUTPUT FILE #with pd.ExcelWriter(self.output_summary, engine='openpyxl') as writer: # df.to_excel(writer, sheet_name=sample_id, index=False, columns=fields) # writer.save() all_samples = pd.concat(contents.values(), keys=contents.keys(), axis=1) all_samples.dropna(axis=0, how='all', inplace=True) ordered = all_samples.reindex(columns=fields, level=1) ordered.to_csv(self.output_summary, sep='\t', index=False)
def compute_suggested_adv_keyterms_dataset(relative_dataset_filename, min_members_per_cluster = 5): filepath = "dataset/keyterm_clustering/" + relative_dataset_filename + ".json" top_adv_clusters_filepath = "dataset/keyterm_clustering/top_adv_keyterm_clusters.dump" ## load dataset and embedding model print "Loading Embedding model ..." embedding_model = load_embedding_model(True) vocabulary = embedding_model.vocab df = None top_adv_clusters = None print "Loading datasets ..." with open(top_adv_clusters_filepath) as fp: top_adv_clusters = np.load(fp) with open(filepath) as fp: df = pd.read_json(fp) ## compute result_dataset = [] print "Starting computation ..." for index, row in df.iterrows(): url = row['url'] print "Processing clusters for URL: " + url + " ..." clusters = row['clusters'] for cl_data in clusters: if cl_data['len'] >= min_members_per_cluster: suggested_keyterms = suggest_top_adv_keyterms(cl_data, top_adv_clusters, embedding_model) entry = { 'url': url, 'cl_idx': cl_data['idx'], 'cl_center': cl_data['center'], 'cl_len': cl_data['len'], 'suggested_keyterms': suggested_keyterms } result_dataset.append(entry) df_matching = pd.DataFrame.from_records(result_dataset) writer = pd.ExcelWriter("dataset/keyterm_clustering/" + relative_dataset_filename + "_suggested_adv" + ".xlsx") df_matching.to_excel(writer, "adv_matching") writer.save() return df_matching
def writeDiffsToXLSX(outFile, referenceFile, otherFiles, skiprows=1, interpolate=False, years=None, startYear=0): """ Compute the differences between the data in a reference .CSV file and one or more other .CSV files as (other - reference), optionally interpolating annual values between timesteps, storing the results in a single .XLSX file with each difference matrix on a separate worksheet, and with an index worksheet with links to the other worksheets. See also :py:func:`writeDiffsToCSV` and :py:func:`writeDiffsToFile`. :param outFile: (str) the name of the .XLSX file to create :param referenceFile: (str) the name of a .CSV file containing reference results :param otherFiles: (list of str) the names of other .CSV file for which to compute differences. :param skiprows: (int) should be 1 for GCAM files, to skip header info before column names :param interpolate: (bool) if True, linearly interpolate annual values between timesteps in all data files and compute the differences for all resulting years. :param years: (iterable of 2 values coercible to int) the range of years to include in results. :param startYear: (int) the year at which to begin interpolation, if interpolate is True. Defaults to the first year in `years`. :return: none """ import pandas as pd with pd.ExcelWriter(outFile, engine='xlsxwriter') as writer: sheetNum = 1 _logger.debug("Reading reference file:", referenceFile) refDF = readCsv(referenceFile, skiprows=skiprows, interpolate=interpolate, years=years, startYear=startYear) for otherFile in otherFiles: otherFile = ensureCSV(otherFile) # add csv extension if needed _logger.debug("Reading other file:", otherFile) otherDF = readCsv(otherFile, skiprows=skiprows, interpolate=interpolate, years=years, startYear=startYear) sheetName = 'Diff%d' % sheetNum sheetNum += 1 diff = computeDifference(refDF, otherDF) diff.to_excel(writer, index=None, sheet_name=sheetName, startrow=2, startcol=0) worksheet = writer.sheets[sheetName] label = "[%s] minus [%s]" % (otherFile, referenceFile) worksheet.write_string(0, 0, label) startRow = diff.shape[0] + 4 worksheet.write_string(startRow, 0, otherFile) startRow += 2 otherDF.reset_index(inplace=True) otherDF.to_excel(writer, index=None, sheet_name=sheetName, startrow=startRow, startcol=0) dropExtraCols(refDF, inplace=True) _logger.debug("writing DF to excel file", outFile) refDF.to_excel(writer, index=None, sheet_name='Reference', startrow=0, startcol=0)
def generate_report(result_dict, target_report_csv_path): from six import StringIO output_path = os.path.join(target_report_csv_path, result_dict["summary"]["strategy_name"]) try: os.mkdir(output_path) except: pass xlsx_writer = pd.ExcelWriter(os.path.join(output_path, "report.xlsx"), engine='xlsxwriter') # summary.csv csv_txt = StringIO() summary = result_dict["summary"] csv_txt.write(u"\n".join(sorted("{},{}".format(key, value) for key, value in six.iteritems(summary)))) df = pd.DataFrame(data=[{"val": val} for val in summary.values()], index=summary.keys()).sort_index() df.to_excel(xlsx_writer, sheet_name="summary") with open(os.path.join(output_path, "summary.csv"), 'w') as csvfile: csvfile.write(csv_txt.getvalue()) for name in ["portfolio", "stock_account", "future_account", "stock_positions", "future_positions", "trades"]: try: df = result_dict[name] except KeyError: continue # replace all date in dataframe as string if df.index.name == "date": df = df.reset_index() df["date"] = df["date"].apply(lambda x: x.strftime("%Y-%m-%d")) df = df.set_index("date") csv_txt = StringIO() csv_txt.write(df.to_csv(encoding='utf-8')) df.to_excel(xlsx_writer, sheet_name=name) with open(os.path.join(output_path, "{}.csv".format(name)), 'w') as csvfile: csvfile.write(csv_txt.getvalue()) # report.xls <--- ??sheet??? xlsx_writer.save()
def get_context_data(self, **kwargs): context = {} params_data = self.request.GET subject = Subject.objects.get(id=params_data['subject_id']) context['title'] = _('Interaction Data') context['subject_name'] = subject.name if params_data['topic'] == _("All"): context['topic_name'] = params_data['topic'] else: context['topic_name'] = Topic.objects.get(id=int(params_data['topic'])).name context['init_date'] = params_data['init_date'] context['end_date'] = params_data['end_date'] context['subject'] = subject #I used getlist method so it can get more than one tag and one resource class_name resources = params_data.getlist('resource') tags = params_data.getlist('tag') self.from_mural = params_data['from_mural'] self.from_messages = params_data['from_messages'] context['data'], context['header'] = self.get_mural_data(subject, params_data['topic'], params_data['init_date'], params_data['end_date'], resources, tags ) #this is to save the csv for further download df = pd.DataFrame.from_dict(context['data'], orient='index') df.columns = context['header'] #so it does not exist more than one report CSV available for that user to download if ReportCSV.objects.filter(user= self.request.user).count() > 0: report = ReportCSV.objects.get(user=self.request.user) report.delete() report = ReportCSV(user= self.request.user, csv_data = df.to_csv()) report.save() #for excel files if ReportXLS.objects.filter(user= self.request.user).count() > 0: report = ReportXLS.objects.get(user=self.request.user) report.delete() folder_path = join(settings.MEDIA_ROOT, 'files') #check if the folder already exists if not os.path.isdir(folder_path): os.makedirs(folder_path) path = join(settings.MEDIA_ROOT, 'files' , 'report'+str(self.request.user.id)+'.xls') writer = pd.ExcelWriter(path) df.to_excel(writer, sheet_name='first_sheet') writer.save() report = ReportXLS(user= self.request.user ) report.xls_data.name = path report.save() return context
def create_excel(transactions, name_file): """ Create an Excel file using transactions This function is responsible for creating an Excel file using all the transactions we have until today. Creates a table order by date of transaction and highlights the positive and negative transactions. Args: name_file (string): name of the excel file. transactions (list): list of transactions """ df = pd.DataFrame({'Data': transactions.keys(), 'Montante': transactions.values()}) df['Data'] = pd.to_datetime(df.Data, dayfirst=[True]) df = df.sort_values(['Data', 'Montante'], ascending=[True, False]) # Create a Pandas Excel writer using XlsxWriter as the engine. writer = pd.ExcelWriter('{}.xlsx'.format(name_file), engine='xlsxwriter', datetime_format='dd-mm-yyyy') df.to_excel(writer, sheet_name='Transactions', index=False) # Get the xlsxwriter objects from the dataframe writer object. workbook = writer.book worksheet = writer.sheets['Transactions'] format_mont = workbook.add_format({'num_format': u'#,##0.00 \u20ac'}) format_red = workbook.add_format({'bg_color': '#FFC7CE', 'font_color': '#9C0006'}) format_green = workbook.add_format({'bg_color': '#C6EFCE', 'font_color': '#006100'}) worksheet.set_column('B:B', 10, format_mont) worksheet.set_column('A:A', 13, None) worksheet.conditional_format('B2:B{}'.format(len(df.index) + 1), {'type': 'cell', 'criteria': '>', 'value': 0, 'format': format_green}) worksheet.conditional_format('B1:B{}'.format(len(df.index) + 1), {'type': 'cell', 'criteria': '<', 'value': 0, 'format': format_red}) writer.save()