Python openpyxl 模块,load_workbook() 实例源码
我们从Python开源项目中,提取了以下50个代码示例,用于说明如何使用openpyxl.load_workbook()。
def excel2Dict(filename):
wb = load_workbook(filename=filename, read_only=True)
ws = wb.active
records = []
count = 0
keys = []
for row in ws.rows:
temp = []
for cell in row:
if count == 0:
keys.append(str(cell.value))
else:
temp.append(str(cell.value))
if count > 0:
records.append(dict(zip(keys, temp)))
else:
count += 1
return records
def writrProxyExcel(proxyInfo_list):
"""
?????????IP
"""
pata = '/Users/wangjiacan/Desktop/shawn/????/agentPool.xlsx'
page_workbook = openpyxl.load_workbook(pata)
# ???
page_workbook.remove_sheet(page_workbook.get_sheet_by_name(page_workbook.get_sheet_names()[0]))
# ???
new_sheet = page_workbook.create_sheet()
for i in range(len(proxyInfo_list)):
new_sheet["A%d" % (i + 1)].value = proxyInfo_list[i]['ip']
new_sheet["B%d" % (i + 1)].value = proxyInfo_list[i]['??']
new_sheet["C%d" % (i + 1)].value = proxyInfo_list[i]['????']
page_workbook.save(pata)
def _import_phenolist_xlsx(filepath, has_header):
import openpyxl
try:
wb = openpyxl.load_workbook(filepath)
assert len(wb.worksheets) == 1
sheet = wb.worksheets[0]
rows = [[cell.value for cell in row] for row in sheet.rows]
num_cols = len(rows[0])
if has_header:
fieldnames, rows = rows[0], rows[1:]
if any(fieldname is None or fieldname == '' for fieldname in fieldnames):
if has_header == 'augment':
fieldnames = [i if fieldname is None else fieldname for i, fieldname in enumerate(fieldnames)]
else:
raise PheWebError('bad xlsx header')
assert len(set(fieldnames)) == len(fieldnames), fieldnames
else:
fieldnames = list(range(num_cols))
return [{fieldnames[i]: row[i] for i in range(num_cols)} for row in rows]
except openpyxl.utils.exceptions.InvalidFileException:
if filepath.endswith('.xlsx'):
raise PheWebError("The filepath {!r} ends with '.xlsx' but reading it as an excel file failed.".format(filepath))
return None
def parse_xlsx(self, doc, sh=False):
workbook = load_workbook(filename = doc)
if sh:
sheet = workbook[sh]
else:
sheet = workbook['sheet1']
dimensions = sheet.dimensions
d1, d2 = dimensions.split(':')
cols = list(string.ascii_uppercase)
cols += [''.join(x) for x in product(cols,cols)] # to include further columns, named as combinations of characters
firstcol = ''.join([x for x in d1 if re.search(r'[A-Z]', x)])
lastcol = ''.join([x for x in d2 if re.search(r'[A-Z]', x)])
firstrow = int(''.join([x for x in d1 if re.search(r'[0-9]', x)]))
lastrow = int(''.join([x for x in d2 if re.search(r'[0-9]', x)]))
cols = cols[:cols.index(lastcol) + 1]
lines = []
for i in range(firstrow, lastrow+1):
line = []
for c in cols:
line.append(sheet[c + str(i)].value)
lines.append(line)
return lines
def insert_part_fields_into_xlsx(part_fields_dict, filename, recurse, group_components):
'''Insert the fields in the extracted part dictionary into an XLSX spreadsheet.'''
logger.log(
DEBUG_OVERVIEW,
'Inserting extracted fields into XLSX file {}.'.format(filename))
# Either insert fields into an existing workbook, or use an empty one.
try:
wb = pyxl.load_workbook(filename, data_only=True)
except IOError:
wb = None
wb = insert_part_fields_into_wb(part_fields_dict, wb)
if group_components:
wb = group_wb(wb)
wb.save(filename)
def Open(self):
try:
if not os.path.isfile(self.v_filename):
raise Spartacus.Utils.Exception('File {0} does not exist or is not a file.'.format(self.v_filename))
if self.v_extension == 'csv':
self.v_file = open(self.v_filename, encoding=self.v_encoding)
v_sample = self.v_file.read(1024)
self.v_file.seek(0)
v_sniffer = csv.Sniffer()
if not v_sniffer.has_header(v_sample):
raise Spartacus.Utils.Exception('CSV file {0} does not have a header.'.format(self.v_filename))
v_dialect = v_sniffer.sniff(v_sample)
self.v_object = csv.DictReader(self.v_file, self.v_header, None, None, v_dialect)
self.v_open = True
elif self.v_extension == 'xlsx':
self.v_object = openpyxl.load_workbook(self.v_filename, read_only=True)
self.v_open = True
else:
raise Spartacus.Utils.Exception('File extension "{0}" not supported.'.format(self.v_extension))
except Spartacus.Utils.Exception as exc:
raise exc
except Exception as exc:
raise Spartacus.Utils.Exception(str(exc))
def test02_transform():
base_output = "output_fctl_data_dict"
outputs = {}
extensions = ['html', 'xlsx', 'tsv', 'md']
for ext in extensions:
outputs[ext] = "{}.{}".format(base_output, ext)
exp = os.path.join(TEST_DIR, 'resources', 'expected', 'data_dict')
argv = ['transform', SCHEMA_FILE, '--output', base_output, '--columns',
'Field_compact_name', 'Field_name', 'Full_name', 'Description', 'Count', 'Percentage',
'Types_count',
'--formats'] + extensions
main(argv)
assert filecmp.cmp(outputs['tsv'], "{}.tsv".format(exp))
assert filecmp.cmp(outputs['md'], "{}.md".format(exp))
with open(outputs['html']) as out_fd, \
open("{}.html".format(exp)) as exp_fd:
assert out_fd.read().replace(' ', '') == exp_fd.read().replace(' ', '')
res = [cell.value for row in load_workbook(outputs['xlsx']).active for cell in row]
exp = [cell.value for row in load_workbook("{}.xlsx".format(exp)).active for cell in row]
assert res == exp
for output in outputs.values():
os.remove(output)
def test04_transform_default_cols():
base_output = "output_fctl_data_dict_default"
outputs = {}
extensions = ['html', 'xlsx', 'tsv', 'md']
for ext in extensions:
outputs[ext] = "{}.{}".format(base_output, ext)
exp = os.path.join(TEST_DIR, 'resources', 'expected', 'data_dict_default')
argv = ['transform', SCHEMA_FILE, '--output', base_output, '--formats'] + extensions
main(argv)
assert filecmp.cmp(outputs['tsv'], "{}.tsv".format(exp))
assert filecmp.cmp(outputs['md'], "{}.md".format(exp))
with open(outputs['html']) as out_fd, \
open("{}.html".format(exp)) as exp_fd:
assert out_fd.read().replace(' ', '') == exp_fd.read().replace(' ', '')
res = [cell.value for row in load_workbook(outputs['xlsx']).active for cell in row]
exp = [cell.value for row in load_workbook("{}.xlsx".format(exp)).active for cell in row]
assert res == exp
for output in outputs.values():
os.remove(output)
def test06_compare():
base_output = "output_fctl_diff"
outputs = {}
extensions = ['html', 'xlsx', 'tsv', 'md']
for ext in extensions:
outputs[ext] = "{}.{}".format(base_output, ext)
exp = os.path.join(TEST_DIR, 'resources', 'functional', 'expected', 'diff')
exp_schema = os.path.join(TEST_DIR, 'resources', 'input', 'test_schema2.json')
argv = ['compare', SCHEMA_FILE, exp_schema, '--output', base_output, '--formats'] + extensions
main(argv)
assert filecmp.cmp(outputs['tsv'], "{}.tsv".format(exp))
assert filecmp.cmp(outputs['md'], "{}.md".format(exp))
with open(outputs['html']) as out_fd, \
open("{}.html".format(exp)) as exp_fd:
assert out_fd.read().replace(' ', '') == exp_fd.read().replace(' ', '')
res = [cell.value for row in load_workbook(outputs['xlsx']).active for cell in row]
exp = [cell.value for row in load_workbook("{}.xlsx".format(exp)).active for cell in row]
assert res == exp
for output in outputs.values():
os.remove(output)
def test07_compare_detailed():
base_output = "output_fctl_detailed_diff"
outputs = {}
extensions = ['html', 'xlsx', 'tsv', 'md']
for ext in extensions:
outputs[ext] = "{}.{}".format(base_output, ext)
exp = os.path.join(TEST_DIR, 'resources', 'functional', 'expected', 'detailed_diff')
exp_schema = os.path.join(TEST_DIR, 'resources', 'input', 'test_schema2.json')
argv = ['compare', SCHEMA_FILE, exp_schema, '--output', base_output, '--detailed_diff',
'--formats'] + extensions
main(argv)
assert filecmp.cmp(outputs['tsv'], "{}.tsv".format(exp))
assert filecmp.cmp(outputs['md'], "{}.md".format(exp))
with open(outputs['html']) as out_fd, \
open("{}.html".format(exp)) as exp_fd:
assert out_fd.read().replace(' ', '') == exp_fd.read().replace(' ', '')
res = [cell.value for row in load_workbook(outputs['xlsx']).active for cell in row]
exp = [cell.value for row in load_workbook("{}.xlsx".format(exp)).active for cell in row]
assert res == exp
for output in outputs.values():
os.remove(output)
def load_excel(excel_file):
'''??Excel???????Excel????????????'''
_wb = load_workbook(excel_file, read_only=True)
_ws = _wb.active
_title = []
_items = collections.OrderedDict()
for _r in _ws.rows:
if not _title:
for _i in _r:
_title.append(_i.value)
else:
_item = []
for _i in _r:
_item.append(_i.value)
_items[_item[0]] = _item
_wb.close()
return _title, _items
def excel_to_db(from_excel, to_db):
'''Transfer Excel file to leveldb, return total count.'''
_wb = load_workbook(from_excel, read_only=True)
_ws = _wb.active
_db = leveldb.LevelDB(to_db, create_if_missing=True) if isinstance(to_db, str) else to_db
total = 0
for _row in _ws.iter_rows(min_row=2, min_col=1, max_col=1):
if _row and _row[0] and _row[1]:
_key, _value = '', ''
if _row[0].data_type == cell.Cell.TYPE_STRING:
_key = _row[0].value.encode('utf-8')
_key = ''.join(_key.split())
if _row[1].data_type == cell.Cell.TYPE_STRING:
_value = _row[0].value.encode('utf-8')
_value = ''.join(_value.split())
_db.Put(_key, _value)
total += 1
_wb.close()
return total
def test_xls_export_works_with_unicode(self):
survey = create_survey_from_xls(_logger_fixture_path(
'childrens_survey_unicode.xls'))
export_builder = ExportBuilder()
export_builder.set_survey(survey)
temp_xls_file = NamedTemporaryFile(suffix='.xlsx')
export_builder.to_xls_export(temp_xls_file.name, self.data_utf8)
temp_xls_file.seek(0)
# check that values for red\u2019s and blue\u2019s are set to true
wb = load_workbook(temp_xls_file.name)
children_sheet = wb.get_sheet_by_name("children.info")
data = dict([(r[0].value, r[1].value) for r in children_sheet.columns])
self.assertTrue(data[u'children.info/fav_colors/red\u2019s'])
self.assertTrue(data[u'children.info/fav_colors/blue\u2019s'])
self.assertFalse(data[u'children.info/fav_colors/pink\u2019s'])
temp_xls_file.close()
def test_to_xls_export_respects_custom_field_delimiter(self):
survey = self._create_childrens_survey()
export_builder = ExportBuilder()
export_builder.GROUP_DELIMITER = ExportBuilder.GROUP_DELIMITER_DOT
export_builder.set_survey(survey)
xls_file = NamedTemporaryFile(suffix='.xls')
filename = xls_file.name
export_builder.to_xls_export(filename, self.data)
xls_file.seek(0)
wb = load_workbook(filename)
# check header columns
main_sheet = wb.get_sheet_by_name('childrens_survey')
expected_column_headers = [
u'name', u'age', u'geo.geolocation', u'geo._geolocation_latitude',
u'geo._geolocation_longitude', u'geo._geolocation_altitude',
u'geo._geolocation_precision', u'tel.tel.office',
u'tel.tel.mobile', u'_id', u'meta.instanceID', u'_uuid',
u'_submission_time', u'_index', u'_parent_index',
u'_parent_table_name', u'_tags', '_notes']
column_headers = [c[0].value for c in main_sheet.columns]
self.assertEqual(sorted(column_headers),
sorted(expected_column_headers))
xls_file.close()
def test_to_xls_export_generates_valid_sheet_names(self):
survey = create_survey_from_xls(_logger_fixture_path(
'childrens_survey_with_a_very_long_name.xls'))
export_builder = ExportBuilder()
export_builder.set_survey(survey)
xls_file = NamedTemporaryFile(suffix='.xls')
filename = xls_file.name
export_builder.to_xls_export(filename, self.data)
xls_file.seek(0)
wb = load_workbook(filename)
# check that we have childrens_survey, children, children_cartoons
# and children_cartoons_characters sheets
expected_sheet_names = ['childrens_survey_with_a_very_lo',
'childrens_survey_with_a_very_l1',
'childrens_survey_with_a_very_l2',
'childrens_survey_with_a_very_l3']
self.assertEqual(wb.get_sheet_names(), expected_sheet_names)
xls_file.close()
def test_child_record_parent_table_is_updated_when_sheet_is_renamed(self):
survey = create_survey_from_xls(_logger_fixture_path(
'childrens_survey_with_a_very_long_name.xls'))
export_builder = ExportBuilder()
export_builder.set_survey(survey)
xls_file = NamedTemporaryFile(suffix='.xlsx')
filename = xls_file.name
export_builder.to_xls_export(filename, self.long_survey_data)
xls_file.seek(0)
wb = load_workbook(filename)
# get the children's sheet
ws1 = wb.get_sheet_by_name('childrens_survey_with_a_very_l1')
# parent_table is in cell K2
parent_table_name = ws1.cell('K2').value
expected_parent_table_name = 'childrens_survey_with_a_very_lo'
self.assertEqual(parent_table_name, expected_parent_table_name)
# get cartoons sheet
ws2 = wb.get_sheet_by_name('childrens_survey_with_a_very_l2')
parent_table_name = ws2.cell('G2').value
expected_parent_table_name = 'childrens_survey_with_a_very_l1'
self.assertEqual(parent_table_name, expected_parent_table_name)
xls_file.close()
def build(self):
import openpyxl
tf = None
if not tf:
defaultlocation = os.path.dirname(os.path.abspath(__file__))+os.sep+"sources"+os.sep+"sinapi.xlsx"
if os.path.exists(defaultlocation):
print "building from ",defaultlocation
tf = defaultlocation
else:
tf = self.download()
if not tf:
return
f = openpyxl.load_workbook(tf)
self.descriptions = []
self.values = []
self.units = []
self.codes = []
sh = f.get_sheet_by_name(f.get_sheet_names()[0])
for i in range(1,sh.max_row):
r = sh.rows[i]
self.codes.append(r[0].value)
self.descriptions.append(r[1].value)
self.units.append(r[2].value)
self.values.append(r[3].value)
print "parsed data: ",len(self.codes),"/",len(self.descriptions),"/",len(self.values),"/",len(self.units)
def load_excel():
wb = openpyxl.load_workbook(filename=excel_file)
ws = wb['data']
# Parse data from Excel file
dict_list = []
keys = {'A': 'Id', 'B': 'Full Name', 'C': 'Email', 'D': 'Phone'}
for row in ws.iter_rows(min_row=2, max_col=4):
d = {}
for cell in row:
if cell.column == 'A' and cell.value == None:
break
elif cell.column != 'A' and cell.value == None:
d[keys[cell.column]] = ''
else:
d[keys[cell.column]] = str(cell.value)
if len(d.keys()) != 0:
dict_list.append(d)
return dict_list
# Update lead in excel worksheet and working dictionary
# Removing from excel leaves a blank row in the excel
def remove_lead(customer_list, customer_id):
wb = openpyxl.load_workbook(filename=excel_file)
ws = wb['data']
for row in ws.iter_rows(min_row=2, max_col=4):
for cell in row:
if cell.value == None or cell.column == 'B' or cell.column == 'C' or cell.column == 'D':
continue
elif cell.column == 'A' and str(cell.value) == customer_id:
cell_row = str(cell.row)
cell.value = None
ws['B' + cell_row].value = None
ws['C' + cell_row].value = None
ws['D' + cell_row].value = None
break
wb.save(filename=excel_file)
# Remove selected lead from excel sheet
for customer in customer_list:
if customer['Id'] == customer_id:
customer_list.pop(customer_list.index(customer))
return customer_list
def write_data(file_name, data, period):
''' Method used for writing data into .xls file '''
wb = openpyxl.load_workbook(filename=file_name)
sheet = wb['Vulnerabilities']
number = 1
for row_data in data:
if is_in_between(period, row_data['date']):
values = []
values.append(number)
values.append(row_data['val_name'])
values.append(row_data['severity'])
values.append(row_data['date'])
values.append(row_data['description'])
values.append(row_data['affected'])
values.append(row_data['solution'])
values.append(row_data['link'])
for index, value in enumerate(values, start=1):
sheet.cell(row=number+10, column=index+1).value = value
number+=1
wb.save(filename=file_name)
def _process_xlsx(self, datafile):
with warnings.catch_warnings():
warnings.simplefilter('ignore')
wb = load_workbook(datafile, read_only=True, data_only=True)
ws = wb.active
num_rows = ws.max_row
num_cols = ws.max_column
new_file = UploadedFile(
filename=self.upload_file_form.cleaned_data['marketing_file'].name,
uploaded_by=self.request.user,
num_columns=num_cols
)
new_file.save()
self.uploaded_file = new_file
for row_num in range(1, num_rows+1):
if self._xlsx_row_is_not_blank(ws, row_num, num_cols):
self._add_xlsx_row_to_db(ws, row_num, num_cols)
def __init__(self, file=None, template_styles=None, timestamp=None, templated_sheets=None):
super().__init__()
self.workbook = load_workbook(filename=file) if file else Workbook()
self.template_styles = template_styles or DefaultStyleSet()
self.timestamp = timestamp
self.templated_sheets = []
for sheetname, templated_sheet in self._items.items():
self.add_templated_sheet(templated_sheet, sheetname=sheetname, add_to_self=False)
for templated_sheet in templated_sheets or []:
self.add_templated_sheet(sheet=templated_sheet, sheetname=templated_sheet.sheetname, add_to_self=True)
self._validate()
def test_write_workbook(datadir, tmpdir):
datadir.chdir()
src = ZipFile("book1.xlsx")
orig_files = set(src.namelist())
src.close()
from openpyxl import load_workbook
wb = load_workbook("book1.xlsx")
tmpdir.chdir()
wb.save("book1.xlsx")
src = ZipFile("book1.xlsx")
out_files = set(src.namelist())
src.close()
# remove files from archive that the other can't have
out_files.discard("xl/sharedStrings.xml")
orig_files.discard("xl/calcChain.xml")
def render_xlsx(self, outfd, data):
wb = Workbook(optimized_write = True)
ws = wb.create_sheet()
ws.title = 'Timeline Output'
header = ["Time", "Type", "Item", "Details", "Reason"]
ws.append(header)
total = 1
for line in data:
coldata = line.split("|")
ws.append(coldata)
total += 1
wb.save(filename = self._config.OUTPUT_FILE)
if self._config.HIGHLIGHT != None:
wb = load_workbook(filename = self._config.OUTPUT_FILE)
ws = wb.get_sheet_by_name(name = "Timeline Output")
for col in xrange(1, len(header) + 1):
ws.cell("{0}{1}".format(get_column_letter(col), 1)).style.font.bold = True
for row in xrange(2, total + 1):
for col in xrange(2, len(header)):
if ws.cell("{0}{1}".format(get_column_letter(col), row)).value in self.suspicious.keys():
self.fill(ws, row, len(header) + 1, self.suspicious[ws.cell("{0}{1}".format(get_column_letter(col), row)).value]["color"])
ws.cell("{0}{1}".format(get_column_letter(col + 1), row)).value = self.suspicious[ws.cell("{0}{1}".format(get_column_letter(col), row)).value]["reason"]
wb.save(filename = self._config.OUTPUT_FILE)
def writeExcel(poxy_list):
"""
??????????excel
"""
pata = '/Users/wangjiacan/Desktop/shawn/????/agentPool.xlsx'
title = ['ip', '??', '????',]
if not os.path.exists(pata):
workbook = openpyxl.Workbook()
sheet = workbook.active
sheet["A1"].value = title[0]
sheet["B1"].value = title[1]
sheet["C1"].value = title[2]
workbook.save(pata)
page_workbook = openpyxl.load_workbook(pata)
page_sheet = page_workbook.get_sheet_by_name(page_workbook.get_sheet_names()[0])
row = page_sheet.max_row
for i in range(len(poxy_list)):
page_sheet["A%d" % (row + i + 1)].value = poxy_list[i]['ip']
page_sheet["B%d" % (row + i + 1)].value = poxy_list[i]['??']
page_sheet["C%d" % (row + i + 1)].value = poxy_list[i]['????']
page_workbook.save(pata)
def readExcel(proxyInfo_list):
pata = '/Users/wangjiacan/Desktop/shawn/????/agentPool.xlsx'
page_workbook = openpyxl.load_workbook(pata)
page_sheet = page_workbook.get_sheet_by_name(page_workbook.get_sheet_names()[0])
for row in page_sheet.rows:
proxyData = {'ip': '', '??': '', '????': '', }
ip = row[0].value
httpType = row[1].value
checkDtime = row[2].value
proxyData["ip"] = ip
proxyData["??"] = httpType
proxyData["????"] = checkDtime
proxyInfo_list.append(proxyData)
page_workbook.save(pata)
def writrProxyExcel(proxyInfo_list):
pata = '/Users/wangjiacan/Desktop/shawn/????/agentPool.xlsx'
page_workbook = openpyxl.load_workbook(pata)
# ???
page_workbook.remove_sheet(page_workbook.get_sheet_by_name(page_workbook.get_sheet_names()[0]))
# ???
new_sheet = page_workbook.create_sheet()
for i in range(len(proxyInfo_list)):
new_sheet["A%d" % (i + 1)].value = proxyInfo_list[i]['ip']
new_sheet["B%d" % (i + 1)].value = proxyInfo_list[i]['??']
new_sheet["C%d" % (i + 1)].value = proxyInfo_list[i]['????']
page_workbook.save(pata)
def readExcel(proxyInfo_list):
"""
???????IP
"""
pata = '/Users/wangjiacan/Desktop/shawn/????/agentPool.xlsx'
page_workbook = openpyxl.load_workbook(pata)
page_sheet = page_workbook.get_sheet_by_name(page_workbook.get_sheet_names()[0])
for row in page_sheet.rows:
proxyData = {'ip': '', '??': '', '????': '', }
ip = row[0].value
httpType = row[1].value
checkDtime = row[2].value
proxyData["ip"] = ip
proxyData["??"] = httpType
proxyData["????"] = checkDtime
proxyInfo_list.append(proxyData)
page_workbook.save(pata)
def __init__(self, output=None, **kwargs):
super(XLSRenderer, self).__init__(**kwargs)
# Make a single delegate text renderer for reuse. Most of the time we
# will just replicate the output from the TextRenderer inside the
# spreadsheet cell.
self.delegate_text_renderer = text.TextRenderer(session=self.session)
self.output = output or self.session.GetParameter("output")
# If no output filename was give, just make a name based on the time
# stamp.
if self.output == None:
self.output = "%s.xls" % time.ctime()
try:
self.wb = openpyxl.load_workbook(self.output)
self.current_ws = self.wb.create_sheet()
except IOError:
self.wb = openpyxl.Workbook()
self.current_ws = self.wb.active
def excel_parser(file = 'test.xlsx', sheel_name = 'Sheet1'):
data = load_workbook(file)
table = data[sheel_name]
print('%-22s %-22s %s' % (table['A1'].value, table['B1'].value, table['C1'].value))
#print(table.rows)
row_location = 0
for row in table.iter_rows():
if row_location == 0:
row_location += 1
continue
else:
cell_location = 0
for cell in row:
if cell_location == 0:
print('??:%-20s' % cell.value, end='')
cell_location += 1
elif cell_location == 1:
print('??:%-20s' % cell.value, end='')
cell_location += 1
elif cell_location == 2:
print('??:%-20s' % cell.value, end='')
cell_location += 1
print()
row_location += 1
def read_excel(excel_path):
candidate_list = list()
wb = load_workbook(excel_path)
ws = wb.active
for i in range(2, ws.max_row - 1): # -1 means that the last row is null
candidate = Candidate(uid=ws.cell(row=i, column=1).value, nickname=ws.cell(row=i, column=2).value,
age=ws.cell(row=i, column=3).value, height=ws.cell(row=i, column=4).value,
image=ws.cell(row=i, column=5).value, marriage=ws.cell(row=i, column=6).value,
education=ws.cell(row=i, column=7).value, work_location=ws.cell(row=i, column=8).value,
work_sublocation=ws.cell(row=i, column=9).value,
shortnote=ws.cell(row=i, column=10).value,
matchCondition=ws.cell(row=i, column=11).value,
randListTag=ws.cell(row=i, column=12).value,
province=ws.cell(row=i, column=13).value, gender=ws.cell(row=i, column=14).value)
candidate_list.append(candidate)
# print(candidate)
return candidate_list
def test_xls_export_works_with_unicode(self):
survey = create_survey_from_xls(_logger_fixture_path(
'childrens_survey_unicode.xls'))
export_builder = ExportBuilder()
export_builder.set_survey(survey)
temp_xls_file = NamedTemporaryFile(suffix='.xlsx')
export_builder.to_xls_export(temp_xls_file.name, self.data_utf8)
temp_xls_file.seek(0)
# check that values for red\u2019s and blue\u2019s are set to true
wb = load_workbook(temp_xls_file.name)
children_sheet = wb.get_sheet_by_name("children.info")
data = dict([(r[0].value, r[1].value) for r in children_sheet.columns])
self.assertTrue(data[u'children.info/fav_colors/red\u2019s'])
self.assertTrue(data[u'children.info/fav_colors/blue\u2019s'])
self.assertFalse(data[u'children.info/fav_colors/pink\u2019s'])
temp_xls_file.close()
def test_to_xls_export_respects_custom_field_delimiter(self):
survey = self._create_childrens_survey()
export_builder = ExportBuilder()
export_builder.GROUP_DELIMITER = ExportBuilder.GROUP_DELIMITER_DOT
export_builder.set_survey(survey)
xls_file = NamedTemporaryFile(suffix='.xls')
filename = xls_file.name
export_builder.to_xls_export(filename, self.data)
xls_file.seek(0)
wb = load_workbook(filename)
# check header columns
main_sheet = wb.get_sheet_by_name('childrens_survey')
expected_column_headers = [
u'name', u'age', u'geo.geolocation', u'geo._geolocation_latitude',
u'geo._geolocation_longitude', u'geo._geolocation_altitude',
u'geo._geolocation_precision', u'tel.tel.office',
u'tel.tel.mobile', u'_id', u'meta.instanceID', u'_uuid',
u'_submission_time', u'_index', u'_parent_index',
u'_parent_table_name', u'_tags', '_notes', '_version']
column_headers = [c[0].value for c in main_sheet.columns]
self.assertEqual(sorted(column_headers),
sorted(expected_column_headers))
xls_file.close()
def test_to_xls_export_generates_valid_sheet_names(self):
survey = create_survey_from_xls(_logger_fixture_path(
'childrens_survey_with_a_very_long_name.xls'))
export_builder = ExportBuilder()
export_builder.set_survey(survey)
xls_file = NamedTemporaryFile(suffix='.xls')
filename = xls_file.name
export_builder.to_xls_export(filename, self.data)
xls_file.seek(0)
wb = load_workbook(filename)
# check that we have childrens_survey, children, children_cartoons
# and children_cartoons_characters sheets
expected_sheet_names = ['childrens_survey_with_a_very_lo',
'childrens_survey_with_a_very_l1',
'childrens_survey_with_a_very_l2',
'childrens_survey_with_a_very_l3']
self.assertEqual(wb.get_sheet_names(), expected_sheet_names)
xls_file.close()
def test_child_record_parent_table_is_updated_when_sheet_is_renamed(self):
survey = create_survey_from_xls(_logger_fixture_path(
'childrens_survey_with_a_very_long_name.xls'))
export_builder = ExportBuilder()
export_builder.set_survey(survey)
xls_file = NamedTemporaryFile(suffix='.xlsx')
filename = xls_file.name
export_builder.to_xls_export(filename, self.long_survey_data)
xls_file.seek(0)
wb = load_workbook(filename)
# get the children's sheet
ws1 = wb.get_sheet_by_name('childrens_survey_with_a_very_l1')
# parent_table is in cell K2
parent_table_name = ws1.cell('K2').value
expected_parent_table_name = 'childrens_survey_with_a_very_lo'
self.assertEqual(parent_table_name, expected_parent_table_name)
# get cartoons sheet
ws2 = wb.get_sheet_by_name('childrens_survey_with_a_very_l2')
parent_table_name = ws2.cell('G2').value
expected_parent_table_name = 'childrens_survey_with_a_very_l1'
self.assertEqual(parent_table_name, expected_parent_table_name)
xls_file.close()
def check_noequations(self):
"""
Verify that no column in a sheet contains an equation
Based on checking every cell in the 4th row
Keyword arguments:
sheet -- sheet object from workbook
"""
wb_open = load_workbook(self.path, read_only = True)
sheet = wb_open.get_sheet_by_name(self.name)
for i in range(1, len(self.header_entries)):
for j in range(1,5):
value = sheet.cell(row = j, column = i).value
# equation check logic, TODO might be better code out there
if (value and isinstance(value, str) and value[0] == "="):
raise AMIExcelError("Cell R4C{0} contain equations."
.format(i))
return True
def main():
parser = _make_parser()
args = parser.parse_args()
_configure_logging(args)
if args.excel:
excel = ami_excel(args.excel)
if excel and excel.validate_workbook():
LOGGER.info("{}: valid".format(args.excel))
else:
if args.output:
wb = load_workbook(args.excel, data_only = True)
wb.save(args.output)
new_excel = ami_excel(args.output)
if new_excel.validate_workbook():
LOGGER.info("{}: valid".format(args.output))
else:
LOGGER.error("{}: invalid".format(args.output))
def parse_statistics(logfile):
xl = pd.ExcelFile(logfile)
df = xl.parse("Sheet")
df = df.sort_values(by='Line Numbers')
writer = pd.ExcelWriter(logfile)
df.to_excel(writer, sheet_name='Sheet', index=False)
writer.save()
wb = openpyxl.load_workbook(logfile)
ws = wb.active
row_count = ws.max_row
column_count = ws.max_column
chart = ScatterChart()
chart.title = "Time upload domain names"
chart.style = 13
chart.x_axis.title = "Line numbers"
chart.y_axis.title = "Time, sec"
xvalues = Reference(ws, min_col=1, min_row=2, max_row=row_count)
color_choice = ['3F888F', 'D24D57']
for i in range(2, column_count + 1):
values = Reference(ws, min_col=i, min_row=1, max_row=row_count)
series = Series(values, xvalues, title_from_data=True)
series.marker.symbol = "diamond"
series.graphicalProperties.line.solidFill = color_choice[i-2]
series.marker.graphicalProperties.line.solidFill = color_choice[i-2]
series.marker.graphicalProperties.solidFill = color_choice[i-2]
series.graphicalProperties.line.width = 20000
chart.series.append(series)
chart.legend.legendPos = 'b'
ws.add_chart(chart)
wb.save(logfile)
def __init__(self):
# self._sheet1_count = 1
# self._sheet2_count = 1
# self._sheet2_movie_count = 0
# ??????
self._xlsx_wb = load_workbook(filename='data.xlsx')
sheetnames = self._xlsx_wb.get_sheet_names()
self._xlsx_ws1 = self._xlsx_wb.get_sheet_by_name(sheetnames[0])
self._xlsx_ws2 = self._xlsx_wb.get_sheet_by_name(sheetnames[1])
# self._xlsx_wb = Workbook()
# self._xlsx_ws1 = self._xlsx_wb.active
# self._xlsx_ws1.title = 'TOP250'
# self._xlsx_ws1['A1'] = u'??'
# self._xlsx_ws1['B1'] = u'????'
# self._xlsx_ws1['C1'] = u'??'
# self._xlsx_ws1['D1'] = u'??'
# self._xlsx_ws1['E1'] = u'??'
# self._xlsx_ws1['F1'] = u'??'
# self._xlsx_ws2 = self._xlsx_wb.create_sheet(u'????')
# self._xlsx_ws2['A1'] = u'????'
# self._xlsx_ws2['B1'] = u'????'
# self._xlsx_ws2['C1'] = u'????'
# self._xlsx_ws2['D1'] = u'???'
# self._xlsx_ws2['E1'] = u'ID?'
# self._xlsx_ws2['F1'] = u'????'
def on_importXLSX_clicked(self):
fileName, _ = QFileDialog.getOpenFileName(self, 'Open file...', self.env, "Microsoft Office Excel(*.xlsx *.xlsm *.xltx *.xltm)")
if fileName:
wb = openpyxl.load_workbook(fileName)
ws = wb.get_sheet_by_name(wb.get_sheet_names()[0])
data = list()
i = 1
while True:
x = ws.cell(row=i, column=1).value
y = ws.cell(row=i, column=2).value
if x==None or y==None:
break
try:
data.append((round(float(x), 4), round(float(y), 4)))
except:
dlgbox = QMessageBox(QMessageBox.Warning, "File error", "Wrong format.\nThe datasheet seems to including non-digital cell.", (QMessageBox.Ok), self)
if dlgbox.exec_():
break
i += 1
for e in data:
self.on_add_clicked(e[0], e[1])
def __init__(self, excel_file, sheet_name='SWITCHES', variable_name_row=1,
key_column=1, template_column=2, variable_start_column=3):
''' Loads the excel configuration file '''
try:
# TODO: Check is file is locked, if so wait and try to open N
# more times after N seconds.
self._wb = load_workbook(excel_file, data_only=True)
self._filename = excel_file
except:
raise Exception("Could not load spreadsheet '%s'" % excel_file)
self.set_excel_sheet(sheet_name)
self.set_variable_name_row(variable_name_row)
self.set_template_column(template_column)
self.set_data_start_row(variable_name_row + 1)
self.set_key_column(key_column)
self.set_variable_start_column(variable_start_column)
def parse_data(data):
cols = ['name', "age", "gp", "w", "l","min" ,"pts", "fgm", "fga",
"fg%", "3pm", "3pa", "ftm", "fta", "ft%" ,"oreb", "dreb", "reb",
"ast", "tov", "stl", "blk", "pf", "dd2", "td3"]
stat_col = cols.index(data)
player_col = 0
wb = load_workbook("nbastats.xlsx")
ws = wb['Sheet1']
for row in ws.values:
yield row[player_col].lower(), row[stat_col]
def save_xls(self, dframe): # ???????????excel?????sheet
xls_path = os.path.join(current_folder, self.name + '.xlsx')
if os.path.exists(xls_path): # excel ??????
book = load_workbook(xls_path)
writer = pd.ExcelWriter(xls_path, engine='openpyxl')
writer.book = book
writer.sheets = dict((ws.title, ws) for ws in book.worksheets)
dframe.to_excel(writer, self.name)
writer.save()
else: # ??????
writer = ExcelWriter(xls_path)
dframe.to_excel(writer, self.name)
writer.save()
def __init__(self, ExcelPath):
self.path = ExcelPath
if not os.path.isfile(ExcelPath): # ????????
self.stat = False
# "??????????"
self.wb = openpyxl.Workbook()
else:
self.stat = True
self.wb = openpyxl.load_workbook(ExcelPath)
def import_openpyxl(self):
"""Import the requests module """
try:
import openpyxl
from openpyxl import load_workbook
except ImportError:
print("openpyxl module is not installed"\
"Please install openpyxl module to"\
"perform any activities related to parsing xl sheets")
else:
self.openpyxl = openpyxl
self.load = load_workbook
def load_workbook(self, wb_location):
"""
Load an existing xl workbook
"""
value = None
try:
wb = self.load(wb_location)
except Exception as err:
print err
print("Error loading workbook, check if file exists")
else:
value = wb
return value
def extract_part_fields_from_xlsx(filename, inc_field_names=None, exc_field_names=None, recurse=False):
'''Return a dictionary of part fields extracted from an XLSX spreadsheet.'''
logger.log(DEBUG_OVERVIEW,
'Extracting fields {}, -{} from XLSX file {}.'.format(inc_field_names, exc_field_names,
filename))
try:
wb = pyxl.load_workbook(filename, data_only=True)
return extract_part_fields_from_wb(wb, inc_field_names, exc_field_names)
except FieldExtractionError:
logger.warn('Field extraction failed on {}.'.format(filename))
return {}
def mainWork(absent):
absent = list(map(int,absent.split(' ')))
lec_times = ('7:50','8:50','10:00','11:00','13:00','14:00','15:10')
strength = 60 # Get from database
subject = 'sdl' # Get subject code from database
cl_division = 'TEA' # Get class and division from database
lec_number = 3 # Needs better way to get
book = opx.load_workbook(cl_division+'.xlsx')
sheet = book.get_sheet_by_name(subject)
curr_col = 1
while sheet[get_column_letter(curr_col)+'1'].value !=None:
curr_col += 1
curr_col = get_column_letter(curr_col)
sheet[curr_col+'1'].value = str(time.localtime().tm_mday) + '-' + str(time.localtime().tm_mon) + '-' + str(time.localtime().tm_year)
for i in range(1,strength+1):
if i in absent:
sheet[curr_col+str(i+1)].value = 'A'
else:
sheet[curr_col+str(i+1)].value = 'P'
print('Marked!',str(i))
sheet[curr_col+str(strength+3)].value = '%.2f' % (((strength-len(absent))/strength) * 100)
sheet[curr_col+str(strength+3)].value += '%'
book.save(cl_division+'.xlsx')
def setUp(self):
report_name = "../test-data-set/test.xlsx"
self.report = rCMS.ComissionXLSX(report_name)
dataset = DataSet()
self.report.add_data(dataset.core_details, dataset.plugins, dataset.themes)
self.report.generate_xlsx()
self.workbook = load_workbook(report_name)
def test03_transform_filter():
base_output = "output_fctl_data_dict_filtered"
outputs = {}
extensions = ['html', 'xlsx', 'tsv', 'md']
for ext in extensions:
outputs[ext] = "{}.{}".format(base_output, ext)
namespace = os.path.join(TEST_DIR, 'resources', 'input', 'namespace.json')
exp = os.path.join(TEST_DIR, 'resources', 'expected', 'data_dict_filtered')
argv = ['transform', SCHEMA_FILE, '--output', base_output,
'--filter', namespace, '--columns',
'Field_compact_name', 'Field_name', 'Full_name', 'Description', 'Count', 'Percentage',
'Types_count',
'--formats'] + extensions
main(argv)
assert filecmp.cmp(outputs['tsv'], "{}.tsv".format(exp))
assert filecmp.cmp(outputs['md'], "{}.md".format(exp))
with open(outputs['html']) as out_fd, \
open("{}.html".format(exp)) as exp_fd:
assert out_fd.read().replace(' ', '') == exp_fd.read().replace(' ', '')
res = [cell.value for row in load_workbook(outputs['xlsx']).active for cell in row]
exp = [cell.value for row in load_workbook("{}.xlsx".format(exp)).active for cell in row]
assert res == exp
for output in outputs.values():
os.remove(output)