我们从Python开源项目中,提取了以下50个代码示例,用于说明如何使用openpyxl.load_workbook()。
def excel2Dict(filename): wb = load_workbook(filename=filename, read_only=True) ws = wb.active records = [] count = 0 keys = [] for row in ws.rows: temp = [] for cell in row: if count == 0: keys.append(str(cell.value)) else: temp.append(str(cell.value)) if count > 0: records.append(dict(zip(keys, temp))) else: count += 1 return records
def writrProxyExcel(proxyInfo_list): """ ?????????IP """ pata = '/Users/wangjiacan/Desktop/shawn/????/agentPool.xlsx' page_workbook = openpyxl.load_workbook(pata) # ??? page_workbook.remove_sheet(page_workbook.get_sheet_by_name(page_workbook.get_sheet_names()[0])) # ??? new_sheet = page_workbook.create_sheet() for i in range(len(proxyInfo_list)): new_sheet["A%d" % (i + 1)].value = proxyInfo_list[i]['ip'] new_sheet["B%d" % (i + 1)].value = proxyInfo_list[i]['??'] new_sheet["C%d" % (i + 1)].value = proxyInfo_list[i]['????'] page_workbook.save(pata)
def _import_phenolist_xlsx(filepath, has_header): import openpyxl try: wb = openpyxl.load_workbook(filepath) assert len(wb.worksheets) == 1 sheet = wb.worksheets[0] rows = [[cell.value for cell in row] for row in sheet.rows] num_cols = len(rows[0]) if has_header: fieldnames, rows = rows[0], rows[1:] if any(fieldname is None or fieldname == '' for fieldname in fieldnames): if has_header == 'augment': fieldnames = [i if fieldname is None else fieldname for i, fieldname in enumerate(fieldnames)] else: raise PheWebError('bad xlsx header') assert len(set(fieldnames)) == len(fieldnames), fieldnames else: fieldnames = list(range(num_cols)) return [{fieldnames[i]: row[i] for i in range(num_cols)} for row in rows] except openpyxl.utils.exceptions.InvalidFileException: if filepath.endswith('.xlsx'): raise PheWebError("The filepath {!r} ends with '.xlsx' but reading it as an excel file failed.".format(filepath)) return None
def parse_xlsx(self, doc, sh=False): workbook = load_workbook(filename = doc) if sh: sheet = workbook[sh] else: sheet = workbook['sheet1'] dimensions = sheet.dimensions d1, d2 = dimensions.split(':') cols = list(string.ascii_uppercase) cols += [''.join(x) for x in product(cols,cols)] # to include further columns, named as combinations of characters firstcol = ''.join([x for x in d1 if re.search(r'[A-Z]', x)]) lastcol = ''.join([x for x in d2 if re.search(r'[A-Z]', x)]) firstrow = int(''.join([x for x in d1 if re.search(r'[0-9]', x)])) lastrow = int(''.join([x for x in d2 if re.search(r'[0-9]', x)])) cols = cols[:cols.index(lastcol) + 1] lines = [] for i in range(firstrow, lastrow+1): line = [] for c in cols: line.append(sheet[c + str(i)].value) lines.append(line) return lines
def insert_part_fields_into_xlsx(part_fields_dict, filename, recurse, group_components): '''Insert the fields in the extracted part dictionary into an XLSX spreadsheet.''' logger.log( DEBUG_OVERVIEW, 'Inserting extracted fields into XLSX file {}.'.format(filename)) # Either insert fields into an existing workbook, or use an empty one. try: wb = pyxl.load_workbook(filename, data_only=True) except IOError: wb = None wb = insert_part_fields_into_wb(part_fields_dict, wb) if group_components: wb = group_wb(wb) wb.save(filename)
def Open(self): try: if not os.path.isfile(self.v_filename): raise Spartacus.Utils.Exception('File {0} does not exist or is not a file.'.format(self.v_filename)) if self.v_extension == 'csv': self.v_file = open(self.v_filename, encoding=self.v_encoding) v_sample = self.v_file.read(1024) self.v_file.seek(0) v_sniffer = csv.Sniffer() if not v_sniffer.has_header(v_sample): raise Spartacus.Utils.Exception('CSV file {0} does not have a header.'.format(self.v_filename)) v_dialect = v_sniffer.sniff(v_sample) self.v_object = csv.DictReader(self.v_file, self.v_header, None, None, v_dialect) self.v_open = True elif self.v_extension == 'xlsx': self.v_object = openpyxl.load_workbook(self.v_filename, read_only=True) self.v_open = True else: raise Spartacus.Utils.Exception('File extension "{0}" not supported.'.format(self.v_extension)) except Spartacus.Utils.Exception as exc: raise exc except Exception as exc: raise Spartacus.Utils.Exception(str(exc))
def test02_transform(): base_output = "output_fctl_data_dict" outputs = {} extensions = ['html', 'xlsx', 'tsv', 'md'] for ext in extensions: outputs[ext] = "{}.{}".format(base_output, ext) exp = os.path.join(TEST_DIR, 'resources', 'expected', 'data_dict') argv = ['transform', SCHEMA_FILE, '--output', base_output, '--columns', 'Field_compact_name', 'Field_name', 'Full_name', 'Description', 'Count', 'Percentage', 'Types_count', '--formats'] + extensions main(argv) assert filecmp.cmp(outputs['tsv'], "{}.tsv".format(exp)) assert filecmp.cmp(outputs['md'], "{}.md".format(exp)) with open(outputs['html']) as out_fd, \ open("{}.html".format(exp)) as exp_fd: assert out_fd.read().replace(' ', '') == exp_fd.read().replace(' ', '') res = [cell.value for row in load_workbook(outputs['xlsx']).active for cell in row] exp = [cell.value for row in load_workbook("{}.xlsx".format(exp)).active for cell in row] assert res == exp for output in outputs.values(): os.remove(output)
def test04_transform_default_cols(): base_output = "output_fctl_data_dict_default" outputs = {} extensions = ['html', 'xlsx', 'tsv', 'md'] for ext in extensions: outputs[ext] = "{}.{}".format(base_output, ext) exp = os.path.join(TEST_DIR, 'resources', 'expected', 'data_dict_default') argv = ['transform', SCHEMA_FILE, '--output', base_output, '--formats'] + extensions main(argv) assert filecmp.cmp(outputs['tsv'], "{}.tsv".format(exp)) assert filecmp.cmp(outputs['md'], "{}.md".format(exp)) with open(outputs['html']) as out_fd, \ open("{}.html".format(exp)) as exp_fd: assert out_fd.read().replace(' ', '') == exp_fd.read().replace(' ', '') res = [cell.value for row in load_workbook(outputs['xlsx']).active for cell in row] exp = [cell.value for row in load_workbook("{}.xlsx".format(exp)).active for cell in row] assert res == exp for output in outputs.values(): os.remove(output)
def test06_compare(): base_output = "output_fctl_diff" outputs = {} extensions = ['html', 'xlsx', 'tsv', 'md'] for ext in extensions: outputs[ext] = "{}.{}".format(base_output, ext) exp = os.path.join(TEST_DIR, 'resources', 'functional', 'expected', 'diff') exp_schema = os.path.join(TEST_DIR, 'resources', 'input', 'test_schema2.json') argv = ['compare', SCHEMA_FILE, exp_schema, '--output', base_output, '--formats'] + extensions main(argv) assert filecmp.cmp(outputs['tsv'], "{}.tsv".format(exp)) assert filecmp.cmp(outputs['md'], "{}.md".format(exp)) with open(outputs['html']) as out_fd, \ open("{}.html".format(exp)) as exp_fd: assert out_fd.read().replace(' ', '') == exp_fd.read().replace(' ', '') res = [cell.value for row in load_workbook(outputs['xlsx']).active for cell in row] exp = [cell.value for row in load_workbook("{}.xlsx".format(exp)).active for cell in row] assert res == exp for output in outputs.values(): os.remove(output)
def test07_compare_detailed(): base_output = "output_fctl_detailed_diff" outputs = {} extensions = ['html', 'xlsx', 'tsv', 'md'] for ext in extensions: outputs[ext] = "{}.{}".format(base_output, ext) exp = os.path.join(TEST_DIR, 'resources', 'functional', 'expected', 'detailed_diff') exp_schema = os.path.join(TEST_DIR, 'resources', 'input', 'test_schema2.json') argv = ['compare', SCHEMA_FILE, exp_schema, '--output', base_output, '--detailed_diff', '--formats'] + extensions main(argv) assert filecmp.cmp(outputs['tsv'], "{}.tsv".format(exp)) assert filecmp.cmp(outputs['md'], "{}.md".format(exp)) with open(outputs['html']) as out_fd, \ open("{}.html".format(exp)) as exp_fd: assert out_fd.read().replace(' ', '') == exp_fd.read().replace(' ', '') res = [cell.value for row in load_workbook(outputs['xlsx']).active for cell in row] exp = [cell.value for row in load_workbook("{}.xlsx".format(exp)).active for cell in row] assert res == exp for output in outputs.values(): os.remove(output)
def load_excel(excel_file): '''??Excel???????Excel????????????''' _wb = load_workbook(excel_file, read_only=True) _ws = _wb.active _title = [] _items = collections.OrderedDict() for _r in _ws.rows: if not _title: for _i in _r: _title.append(_i.value) else: _item = [] for _i in _r: _item.append(_i.value) _items[_item[0]] = _item _wb.close() return _title, _items
def excel_to_db(from_excel, to_db): '''Transfer Excel file to leveldb, return total count.''' _wb = load_workbook(from_excel, read_only=True) _ws = _wb.active _db = leveldb.LevelDB(to_db, create_if_missing=True) if isinstance(to_db, str) else to_db total = 0 for _row in _ws.iter_rows(min_row=2, min_col=1, max_col=1): if _row and _row[0] and _row[1]: _key, _value = '', '' if _row[0].data_type == cell.Cell.TYPE_STRING: _key = _row[0].value.encode('utf-8') _key = ''.join(_key.split()) if _row[1].data_type == cell.Cell.TYPE_STRING: _value = _row[0].value.encode('utf-8') _value = ''.join(_value.split()) _db.Put(_key, _value) total += 1 _wb.close() return total
def test_xls_export_works_with_unicode(self): survey = create_survey_from_xls(_logger_fixture_path( 'childrens_survey_unicode.xls')) export_builder = ExportBuilder() export_builder.set_survey(survey) temp_xls_file = NamedTemporaryFile(suffix='.xlsx') export_builder.to_xls_export(temp_xls_file.name, self.data_utf8) temp_xls_file.seek(0) # check that values for red\u2019s and blue\u2019s are set to true wb = load_workbook(temp_xls_file.name) children_sheet = wb.get_sheet_by_name("children.info") data = dict([(r[0].value, r[1].value) for r in children_sheet.columns]) self.assertTrue(data[u'children.info/fav_colors/red\u2019s']) self.assertTrue(data[u'children.info/fav_colors/blue\u2019s']) self.assertFalse(data[u'children.info/fav_colors/pink\u2019s']) temp_xls_file.close()
def test_to_xls_export_respects_custom_field_delimiter(self): survey = self._create_childrens_survey() export_builder = ExportBuilder() export_builder.GROUP_DELIMITER = ExportBuilder.GROUP_DELIMITER_DOT export_builder.set_survey(survey) xls_file = NamedTemporaryFile(suffix='.xls') filename = xls_file.name export_builder.to_xls_export(filename, self.data) xls_file.seek(0) wb = load_workbook(filename) # check header columns main_sheet = wb.get_sheet_by_name('childrens_survey') expected_column_headers = [ u'name', u'age', u'geo.geolocation', u'geo._geolocation_latitude', u'geo._geolocation_longitude', u'geo._geolocation_altitude', u'geo._geolocation_precision', u'tel.tel.office', u'tel.tel.mobile', u'_id', u'meta.instanceID', u'_uuid', u'_submission_time', u'_index', u'_parent_index', u'_parent_table_name', u'_tags', '_notes'] column_headers = [c[0].value for c in main_sheet.columns] self.assertEqual(sorted(column_headers), sorted(expected_column_headers)) xls_file.close()
def test_to_xls_export_generates_valid_sheet_names(self): survey = create_survey_from_xls(_logger_fixture_path( 'childrens_survey_with_a_very_long_name.xls')) export_builder = ExportBuilder() export_builder.set_survey(survey) xls_file = NamedTemporaryFile(suffix='.xls') filename = xls_file.name export_builder.to_xls_export(filename, self.data) xls_file.seek(0) wb = load_workbook(filename) # check that we have childrens_survey, children, children_cartoons # and children_cartoons_characters sheets expected_sheet_names = ['childrens_survey_with_a_very_lo', 'childrens_survey_with_a_very_l1', 'childrens_survey_with_a_very_l2', 'childrens_survey_with_a_very_l3'] self.assertEqual(wb.get_sheet_names(), expected_sheet_names) xls_file.close()
def test_child_record_parent_table_is_updated_when_sheet_is_renamed(self): survey = create_survey_from_xls(_logger_fixture_path( 'childrens_survey_with_a_very_long_name.xls')) export_builder = ExportBuilder() export_builder.set_survey(survey) xls_file = NamedTemporaryFile(suffix='.xlsx') filename = xls_file.name export_builder.to_xls_export(filename, self.long_survey_data) xls_file.seek(0) wb = load_workbook(filename) # get the children's sheet ws1 = wb.get_sheet_by_name('childrens_survey_with_a_very_l1') # parent_table is in cell K2 parent_table_name = ws1.cell('K2').value expected_parent_table_name = 'childrens_survey_with_a_very_lo' self.assertEqual(parent_table_name, expected_parent_table_name) # get cartoons sheet ws2 = wb.get_sheet_by_name('childrens_survey_with_a_very_l2') parent_table_name = ws2.cell('G2').value expected_parent_table_name = 'childrens_survey_with_a_very_l1' self.assertEqual(parent_table_name, expected_parent_table_name) xls_file.close()
def build(self): import openpyxl tf = None if not tf: defaultlocation = os.path.dirname(os.path.abspath(__file__))+os.sep+"sources"+os.sep+"sinapi.xlsx" if os.path.exists(defaultlocation): print "building from ",defaultlocation tf = defaultlocation else: tf = self.download() if not tf: return f = openpyxl.load_workbook(tf) self.descriptions = [] self.values = [] self.units = [] self.codes = [] sh = f.get_sheet_by_name(f.get_sheet_names()[0]) for i in range(1,sh.max_row): r = sh.rows[i] self.codes.append(r[0].value) self.descriptions.append(r[1].value) self.units.append(r[2].value) self.values.append(r[3].value) print "parsed data: ",len(self.codes),"/",len(self.descriptions),"/",len(self.values),"/",len(self.units)
def load_excel(): wb = openpyxl.load_workbook(filename=excel_file) ws = wb['data'] # Parse data from Excel file dict_list = [] keys = {'A': 'Id', 'B': 'Full Name', 'C': 'Email', 'D': 'Phone'} for row in ws.iter_rows(min_row=2, max_col=4): d = {} for cell in row: if cell.column == 'A' and cell.value == None: break elif cell.column != 'A' and cell.value == None: d[keys[cell.column]] = '' else: d[keys[cell.column]] = str(cell.value) if len(d.keys()) != 0: dict_list.append(d) return dict_list # Update lead in excel worksheet and working dictionary # Removing from excel leaves a blank row in the excel
def remove_lead(customer_list, customer_id): wb = openpyxl.load_workbook(filename=excel_file) ws = wb['data'] for row in ws.iter_rows(min_row=2, max_col=4): for cell in row: if cell.value == None or cell.column == 'B' or cell.column == 'C' or cell.column == 'D': continue elif cell.column == 'A' and str(cell.value) == customer_id: cell_row = str(cell.row) cell.value = None ws['B' + cell_row].value = None ws['C' + cell_row].value = None ws['D' + cell_row].value = None break wb.save(filename=excel_file) # Remove selected lead from excel sheet for customer in customer_list: if customer['Id'] == customer_id: customer_list.pop(customer_list.index(customer)) return customer_list
def write_data(file_name, data, period): ''' Method used for writing data into .xls file ''' wb = openpyxl.load_workbook(filename=file_name) sheet = wb['Vulnerabilities'] number = 1 for row_data in data: if is_in_between(period, row_data['date']): values = [] values.append(number) values.append(row_data['val_name']) values.append(row_data['severity']) values.append(row_data['date']) values.append(row_data['description']) values.append(row_data['affected']) values.append(row_data['solution']) values.append(row_data['link']) for index, value in enumerate(values, start=1): sheet.cell(row=number+10, column=index+1).value = value number+=1 wb.save(filename=file_name)
def _process_xlsx(self, datafile): with warnings.catch_warnings(): warnings.simplefilter('ignore') wb = load_workbook(datafile, read_only=True, data_only=True) ws = wb.active num_rows = ws.max_row num_cols = ws.max_column new_file = UploadedFile( filename=self.upload_file_form.cleaned_data['marketing_file'].name, uploaded_by=self.request.user, num_columns=num_cols ) new_file.save() self.uploaded_file = new_file for row_num in range(1, num_rows+1): if self._xlsx_row_is_not_blank(ws, row_num, num_cols): self._add_xlsx_row_to_db(ws, row_num, num_cols)
def __init__(self, file=None, template_styles=None, timestamp=None, templated_sheets=None): super().__init__() self.workbook = load_workbook(filename=file) if file else Workbook() self.template_styles = template_styles or DefaultStyleSet() self.timestamp = timestamp self.templated_sheets = [] for sheetname, templated_sheet in self._items.items(): self.add_templated_sheet(templated_sheet, sheetname=sheetname, add_to_self=False) for templated_sheet in templated_sheets or []: self.add_templated_sheet(sheet=templated_sheet, sheetname=templated_sheet.sheetname, add_to_self=True) self._validate()
def test_write_workbook(datadir, tmpdir): datadir.chdir() src = ZipFile("book1.xlsx") orig_files = set(src.namelist()) src.close() from openpyxl import load_workbook wb = load_workbook("book1.xlsx") tmpdir.chdir() wb.save("book1.xlsx") src = ZipFile("book1.xlsx") out_files = set(src.namelist()) src.close() # remove files from archive that the other can't have out_files.discard("xl/sharedStrings.xml") orig_files.discard("xl/calcChain.xml")
def render_xlsx(self, outfd, data): wb = Workbook(optimized_write = True) ws = wb.create_sheet() ws.title = 'Timeline Output' header = ["Time", "Type", "Item", "Details", "Reason"] ws.append(header) total = 1 for line in data: coldata = line.split("|") ws.append(coldata) total += 1 wb.save(filename = self._config.OUTPUT_FILE) if self._config.HIGHLIGHT != None: wb = load_workbook(filename = self._config.OUTPUT_FILE) ws = wb.get_sheet_by_name(name = "Timeline Output") for col in xrange(1, len(header) + 1): ws.cell("{0}{1}".format(get_column_letter(col), 1)).style.font.bold = True for row in xrange(2, total + 1): for col in xrange(2, len(header)): if ws.cell("{0}{1}".format(get_column_letter(col), row)).value in self.suspicious.keys(): self.fill(ws, row, len(header) + 1, self.suspicious[ws.cell("{0}{1}".format(get_column_letter(col), row)).value]["color"]) ws.cell("{0}{1}".format(get_column_letter(col + 1), row)).value = self.suspicious[ws.cell("{0}{1}".format(get_column_letter(col), row)).value]["reason"] wb.save(filename = self._config.OUTPUT_FILE)
def writeExcel(poxy_list): """ ??????????excel """ pata = '/Users/wangjiacan/Desktop/shawn/????/agentPool.xlsx' title = ['ip', '??', '????',] if not os.path.exists(pata): workbook = openpyxl.Workbook() sheet = workbook.active sheet["A1"].value = title[0] sheet["B1"].value = title[1] sheet["C1"].value = title[2] workbook.save(pata) page_workbook = openpyxl.load_workbook(pata) page_sheet = page_workbook.get_sheet_by_name(page_workbook.get_sheet_names()[0]) row = page_sheet.max_row for i in range(len(poxy_list)): page_sheet["A%d" % (row + i + 1)].value = poxy_list[i]['ip'] page_sheet["B%d" % (row + i + 1)].value = poxy_list[i]['??'] page_sheet["C%d" % (row + i + 1)].value = poxy_list[i]['????'] page_workbook.save(pata)
def readExcel(proxyInfo_list): pata = '/Users/wangjiacan/Desktop/shawn/????/agentPool.xlsx' page_workbook = openpyxl.load_workbook(pata) page_sheet = page_workbook.get_sheet_by_name(page_workbook.get_sheet_names()[0]) for row in page_sheet.rows: proxyData = {'ip': '', '??': '', '????': '', } ip = row[0].value httpType = row[1].value checkDtime = row[2].value proxyData["ip"] = ip proxyData["??"] = httpType proxyData["????"] = checkDtime proxyInfo_list.append(proxyData) page_workbook.save(pata)
def writrProxyExcel(proxyInfo_list): pata = '/Users/wangjiacan/Desktop/shawn/????/agentPool.xlsx' page_workbook = openpyxl.load_workbook(pata) # ??? page_workbook.remove_sheet(page_workbook.get_sheet_by_name(page_workbook.get_sheet_names()[0])) # ??? new_sheet = page_workbook.create_sheet() for i in range(len(proxyInfo_list)): new_sheet["A%d" % (i + 1)].value = proxyInfo_list[i]['ip'] new_sheet["B%d" % (i + 1)].value = proxyInfo_list[i]['??'] new_sheet["C%d" % (i + 1)].value = proxyInfo_list[i]['????'] page_workbook.save(pata)
def readExcel(proxyInfo_list): """ ???????IP """ pata = '/Users/wangjiacan/Desktop/shawn/????/agentPool.xlsx' page_workbook = openpyxl.load_workbook(pata) page_sheet = page_workbook.get_sheet_by_name(page_workbook.get_sheet_names()[0]) for row in page_sheet.rows: proxyData = {'ip': '', '??': '', '????': '', } ip = row[0].value httpType = row[1].value checkDtime = row[2].value proxyData["ip"] = ip proxyData["??"] = httpType proxyData["????"] = checkDtime proxyInfo_list.append(proxyData) page_workbook.save(pata)
def __init__(self, output=None, **kwargs): super(XLSRenderer, self).__init__(**kwargs) # Make a single delegate text renderer for reuse. Most of the time we # will just replicate the output from the TextRenderer inside the # spreadsheet cell. self.delegate_text_renderer = text.TextRenderer(session=self.session) self.output = output or self.session.GetParameter("output") # If no output filename was give, just make a name based on the time # stamp. if self.output == None: self.output = "%s.xls" % time.ctime() try: self.wb = openpyxl.load_workbook(self.output) self.current_ws = self.wb.create_sheet() except IOError: self.wb = openpyxl.Workbook() self.current_ws = self.wb.active
def excel_parser(file = 'test.xlsx', sheel_name = 'Sheet1'): data = load_workbook(file) table = data[sheel_name] print('%-22s %-22s %s' % (table['A1'].value, table['B1'].value, table['C1'].value)) #print(table.rows) row_location = 0 for row in table.iter_rows(): if row_location == 0: row_location += 1 continue else: cell_location = 0 for cell in row: if cell_location == 0: print('??:%-20s' % cell.value, end='') cell_location += 1 elif cell_location == 1: print('??:%-20s' % cell.value, end='') cell_location += 1 elif cell_location == 2: print('??:%-20s' % cell.value, end='') cell_location += 1 print() row_location += 1
def read_excel(excel_path): candidate_list = list() wb = load_workbook(excel_path) ws = wb.active for i in range(2, ws.max_row - 1): # -1 means that the last row is null candidate = Candidate(uid=ws.cell(row=i, column=1).value, nickname=ws.cell(row=i, column=2).value, age=ws.cell(row=i, column=3).value, height=ws.cell(row=i, column=4).value, image=ws.cell(row=i, column=5).value, marriage=ws.cell(row=i, column=6).value, education=ws.cell(row=i, column=7).value, work_location=ws.cell(row=i, column=8).value, work_sublocation=ws.cell(row=i, column=9).value, shortnote=ws.cell(row=i, column=10).value, matchCondition=ws.cell(row=i, column=11).value, randListTag=ws.cell(row=i, column=12).value, province=ws.cell(row=i, column=13).value, gender=ws.cell(row=i, column=14).value) candidate_list.append(candidate) # print(candidate) return candidate_list
def test_to_xls_export_respects_custom_field_delimiter(self): survey = self._create_childrens_survey() export_builder = ExportBuilder() export_builder.GROUP_DELIMITER = ExportBuilder.GROUP_DELIMITER_DOT export_builder.set_survey(survey) xls_file = NamedTemporaryFile(suffix='.xls') filename = xls_file.name export_builder.to_xls_export(filename, self.data) xls_file.seek(0) wb = load_workbook(filename) # check header columns main_sheet = wb.get_sheet_by_name('childrens_survey') expected_column_headers = [ u'name', u'age', u'geo.geolocation', u'geo._geolocation_latitude', u'geo._geolocation_longitude', u'geo._geolocation_altitude', u'geo._geolocation_precision', u'tel.tel.office', u'tel.tel.mobile', u'_id', u'meta.instanceID', u'_uuid', u'_submission_time', u'_index', u'_parent_index', u'_parent_table_name', u'_tags', '_notes', '_version'] column_headers = [c[0].value for c in main_sheet.columns] self.assertEqual(sorted(column_headers), sorted(expected_column_headers)) xls_file.close()
def check_noequations(self): """ Verify that no column in a sheet contains an equation Based on checking every cell in the 4th row Keyword arguments: sheet -- sheet object from workbook """ wb_open = load_workbook(self.path, read_only = True) sheet = wb_open.get_sheet_by_name(self.name) for i in range(1, len(self.header_entries)): for j in range(1,5): value = sheet.cell(row = j, column = i).value # equation check logic, TODO might be better code out there if (value and isinstance(value, str) and value[0] == "="): raise AMIExcelError("Cell R4C{0} contain equations." .format(i)) return True
def main(): parser = _make_parser() args = parser.parse_args() _configure_logging(args) if args.excel: excel = ami_excel(args.excel) if excel and excel.validate_workbook(): LOGGER.info("{}: valid".format(args.excel)) else: if args.output: wb = load_workbook(args.excel, data_only = True) wb.save(args.output) new_excel = ami_excel(args.output) if new_excel.validate_workbook(): LOGGER.info("{}: valid".format(args.output)) else: LOGGER.error("{}: invalid".format(args.output))
def parse_statistics(logfile): xl = pd.ExcelFile(logfile) df = xl.parse("Sheet") df = df.sort_values(by='Line Numbers') writer = pd.ExcelWriter(logfile) df.to_excel(writer, sheet_name='Sheet', index=False) writer.save() wb = openpyxl.load_workbook(logfile) ws = wb.active row_count = ws.max_row column_count = ws.max_column chart = ScatterChart() chart.title = "Time upload domain names" chart.style = 13 chart.x_axis.title = "Line numbers" chart.y_axis.title = "Time, sec" xvalues = Reference(ws, min_col=1, min_row=2, max_row=row_count) color_choice = ['3F888F', 'D24D57'] for i in range(2, column_count + 1): values = Reference(ws, min_col=i, min_row=1, max_row=row_count) series = Series(values, xvalues, title_from_data=True) series.marker.symbol = "diamond" series.graphicalProperties.line.solidFill = color_choice[i-2] series.marker.graphicalProperties.line.solidFill = color_choice[i-2] series.marker.graphicalProperties.solidFill = color_choice[i-2] series.graphicalProperties.line.width = 20000 chart.series.append(series) chart.legend.legendPos = 'b' ws.add_chart(chart) wb.save(logfile)
def __init__(self): # self._sheet1_count = 1 # self._sheet2_count = 1 # self._sheet2_movie_count = 0 # ?????? self._xlsx_wb = load_workbook(filename='data.xlsx') sheetnames = self._xlsx_wb.get_sheet_names() self._xlsx_ws1 = self._xlsx_wb.get_sheet_by_name(sheetnames[0]) self._xlsx_ws2 = self._xlsx_wb.get_sheet_by_name(sheetnames[1]) # self._xlsx_wb = Workbook() # self._xlsx_ws1 = self._xlsx_wb.active # self._xlsx_ws1.title = 'TOP250' # self._xlsx_ws1['A1'] = u'??' # self._xlsx_ws1['B1'] = u'????' # self._xlsx_ws1['C1'] = u'??' # self._xlsx_ws1['D1'] = u'??' # self._xlsx_ws1['E1'] = u'??' # self._xlsx_ws1['F1'] = u'??' # self._xlsx_ws2 = self._xlsx_wb.create_sheet(u'????') # self._xlsx_ws2['A1'] = u'????' # self._xlsx_ws2['B1'] = u'????' # self._xlsx_ws2['C1'] = u'????' # self._xlsx_ws2['D1'] = u'???' # self._xlsx_ws2['E1'] = u'ID?' # self._xlsx_ws2['F1'] = u'????'
def on_importXLSX_clicked(self): fileName, _ = QFileDialog.getOpenFileName(self, 'Open file...', self.env, "Microsoft Office Excel(*.xlsx *.xlsm *.xltx *.xltm)") if fileName: wb = openpyxl.load_workbook(fileName) ws = wb.get_sheet_by_name(wb.get_sheet_names()[0]) data = list() i = 1 while True: x = ws.cell(row=i, column=1).value y = ws.cell(row=i, column=2).value if x==None or y==None: break try: data.append((round(float(x), 4), round(float(y), 4))) except: dlgbox = QMessageBox(QMessageBox.Warning, "File error", "Wrong format.\nThe datasheet seems to including non-digital cell.", (QMessageBox.Ok), self) if dlgbox.exec_(): break i += 1 for e in data: self.on_add_clicked(e[0], e[1])
def __init__(self, excel_file, sheet_name='SWITCHES', variable_name_row=1, key_column=1, template_column=2, variable_start_column=3): ''' Loads the excel configuration file ''' try: # TODO: Check is file is locked, if so wait and try to open N # more times after N seconds. self._wb = load_workbook(excel_file, data_only=True) self._filename = excel_file except: raise Exception("Could not load spreadsheet '%s'" % excel_file) self.set_excel_sheet(sheet_name) self.set_variable_name_row(variable_name_row) self.set_template_column(template_column) self.set_data_start_row(variable_name_row + 1) self.set_key_column(key_column) self.set_variable_start_column(variable_start_column)
def parse_data(data): cols = ['name', "age", "gp", "w", "l","min" ,"pts", "fgm", "fga", "fg%", "3pm", "3pa", "ftm", "fta", "ft%" ,"oreb", "dreb", "reb", "ast", "tov", "stl", "blk", "pf", "dd2", "td3"] stat_col = cols.index(data) player_col = 0 wb = load_workbook("nbastats.xlsx") ws = wb['Sheet1'] for row in ws.values: yield row[player_col].lower(), row[stat_col]
def save_xls(self, dframe): # ???????????excel?????sheet xls_path = os.path.join(current_folder, self.name + '.xlsx') if os.path.exists(xls_path): # excel ?????? book = load_workbook(xls_path) writer = pd.ExcelWriter(xls_path, engine='openpyxl') writer.book = book writer.sheets = dict((ws.title, ws) for ws in book.worksheets) dframe.to_excel(writer, self.name) writer.save() else: # ?????? writer = ExcelWriter(xls_path) dframe.to_excel(writer, self.name) writer.save()
def __init__(self, ExcelPath): self.path = ExcelPath if not os.path.isfile(ExcelPath): # ???????? self.stat = False # "??????????" self.wb = openpyxl.Workbook() else: self.stat = True self.wb = openpyxl.load_workbook(ExcelPath)
def import_openpyxl(self): """Import the requests module """ try: import openpyxl from openpyxl import load_workbook except ImportError: print("openpyxl module is not installed"\ "Please install openpyxl module to"\ "perform any activities related to parsing xl sheets") else: self.openpyxl = openpyxl self.load = load_workbook
def load_workbook(self, wb_location): """ Load an existing xl workbook """ value = None try: wb = self.load(wb_location) except Exception as err: print err print("Error loading workbook, check if file exists") else: value = wb return value
def extract_part_fields_from_xlsx(filename, inc_field_names=None, exc_field_names=None, recurse=False): '''Return a dictionary of part fields extracted from an XLSX spreadsheet.''' logger.log(DEBUG_OVERVIEW, 'Extracting fields {}, -{} from XLSX file {}.'.format(inc_field_names, exc_field_names, filename)) try: wb = pyxl.load_workbook(filename, data_only=True) return extract_part_fields_from_wb(wb, inc_field_names, exc_field_names) except FieldExtractionError: logger.warn('Field extraction failed on {}.'.format(filename)) return {}
def mainWork(absent): absent = list(map(int,absent.split(' '))) lec_times = ('7:50','8:50','10:00','11:00','13:00','14:00','15:10') strength = 60 # Get from database subject = 'sdl' # Get subject code from database cl_division = 'TEA' # Get class and division from database lec_number = 3 # Needs better way to get book = opx.load_workbook(cl_division+'.xlsx') sheet = book.get_sheet_by_name(subject) curr_col = 1 while sheet[get_column_letter(curr_col)+'1'].value !=None: curr_col += 1 curr_col = get_column_letter(curr_col) sheet[curr_col+'1'].value = str(time.localtime().tm_mday) + '-' + str(time.localtime().tm_mon) + '-' + str(time.localtime().tm_year) for i in range(1,strength+1): if i in absent: sheet[curr_col+str(i+1)].value = 'A' else: sheet[curr_col+str(i+1)].value = 'P' print('Marked!',str(i)) sheet[curr_col+str(strength+3)].value = '%.2f' % (((strength-len(absent))/strength) * 100) sheet[curr_col+str(strength+3)].value += '%' book.save(cl_division+'.xlsx')
def setUp(self): report_name = "../test-data-set/test.xlsx" self.report = rCMS.ComissionXLSX(report_name) dataset = DataSet() self.report.add_data(dataset.core_details, dataset.plugins, dataset.themes) self.report.generate_xlsx() self.workbook = load_workbook(report_name)
def test03_transform_filter(): base_output = "output_fctl_data_dict_filtered" outputs = {} extensions = ['html', 'xlsx', 'tsv', 'md'] for ext in extensions: outputs[ext] = "{}.{}".format(base_output, ext) namespace = os.path.join(TEST_DIR, 'resources', 'input', 'namespace.json') exp = os.path.join(TEST_DIR, 'resources', 'expected', 'data_dict_filtered') argv = ['transform', SCHEMA_FILE, '--output', base_output, '--filter', namespace, '--columns', 'Field_compact_name', 'Field_name', 'Full_name', 'Description', 'Count', 'Percentage', 'Types_count', '--formats'] + extensions main(argv) assert filecmp.cmp(outputs['tsv'], "{}.tsv".format(exp)) assert filecmp.cmp(outputs['md'], "{}.md".format(exp)) with open(outputs['html']) as out_fd, \ open("{}.html".format(exp)) as exp_fd: assert out_fd.read().replace(' ', '') == exp_fd.read().replace(' ', '') res = [cell.value for row in load_workbook(outputs['xlsx']).active for cell in row] exp = [cell.value for row in load_workbook("{}.xlsx".format(exp)).active for cell in row] assert res == exp for output in outputs.values(): os.remove(output)