我创建了一个将 YAML 文件解析为 Excel 的脚本。我编写了部分代码来调整列宽,但现在我想为Desciprtion列换行。
脚本执行后:
我想要的是:
这是我的代码:
import yaml import os import pandas as pd from openpyxl.utils import get_column_letter import sys def yaml_to_excel(dictionary, row): for key, value in dictionary.items(): if type(value) is dict: yield from yaml_to_excel(value, row + 1) elif type(value) is list: for item in value: yield from yaml_to_excel(item, row + 1) row += 1 else: yield (key, value, row) def get_all_subdirectiries_in_path(root_dir): all_subdirectories = [] for file in os.listdir(root_dir): d = os.path.join(root_dir, file) if os.path.isdir(d): all_subdirectories.append(d) return all_subdirectories def dfs_tabs(df_list, sheet_list, file_name, columns): writer = pd.ExcelWriter(file_name,engine='openpyxl') for dataframe, sheet, columns_list in zip(df_list, sheet_list, columns): dataframe.to_excel(writer, sheet_name=sheet, startrow=0 , startcol=0, index=False, columns=columns_list) for column in dataframe: column_width = max(dataframe[column].astype(str).map(len).max() + 5, len(column) + 5) col_letter = get_column_letter(dataframe.columns.get_loc(column) + 1) writer.sheets[sheet].column_dimensions[col_letter].width = column_width writer.save() def get_nested_dir_path(main_dir): if any(File.endswith(".yml") for File in os.listdir(main_dir)): yield main_dir else: for file in os.listdir(main_dir): yield from get_nested_dir_path(main_dir + f"/{file}") def main(dir_path): all_sheets = [] sheet_names = [] all_columns = [] for subdirectory in get_nested_dir_path(dir_path): final_object = [] last_row = 1 current_row = 1 for file in os.listdir(subdirectory): # Check if YAML file exist if (file.split(".")[1] == "yml") | (file.split(".")[1] == "yaml"): with open(subdirectory + f"/{file}", "r", encoding="utf-8") as f: dataMap = yaml.safe_load( f, ) last_row += 1 hierarchy_obj = {} hierarchy = 1 prev_value = "" prev_temp_depth = 2 current_row += 1 starting_row = current_row for key, value, temp_depth in yaml_to_excel(dataMap, last_row): if key not in hierarchy_obj: hierarchy_obj[key] = hierarchy hierarchy += 1 if prev_value != "": if hierarchy_obj[key] < hierarchy_obj[prev_value]: current_row += 1 if (temp_depth > prev_temp_depth) & ( hierarchy_obj[key] > hierarchy_obj[prev_value] ): current_row += 1 if type(value) is bool: if value: value = 'yes' else: value = 'no' final_object.append( { "column": hierarchy_obj[key], "value": value, "row": current_row, } ) prev_value = key prev_temp_depth = temp_depth columns_arr = [] for column_name in hierarchy_obj.keys(): columns_arr.append(column_name) all_columns.append(columns_arr) last_row_in_excel = final_object[len(final_object)-1]['row'] main_arr = [] for i in range(last_row_in_excel): main_arr.append(["" for i in range(len(columns_arr))]) main_arr = [main_arr] for item in final_object: if (type(item['value']) == int) | (type(item['value']) == float): main_arr[0][item['row'] - 1][item['column'] - 1] = item['value'] else: main_arr[0][item['row'] - 1][item['column'] -1] = item['value'] result = pd.DataFrame(main_arr[0], columns = columns_arr) result = result[1:] all_sheets.append(result) sheet_names.append(subdirectory[subdirectory.rfind('/')+1 :].replace("/", " ").replace("_", " ")[0:31]) dfs_tabs(all_sheets, sheet_names, 'test.xlsx', all_columns) if __name__ == "__main__": dir_path = 'Lands' main(dir_path)
下面您可以找到我所基于的 YAML 文件:
Country: France Capital City: Paris Language: French Description: |- A country whose metropolitan part is in Western Europe, and also has overseas territories on other continents. The most important people: President: Emmanuel Macron Prime Minister: Élisabeth Borne Year: 2020 Population: - Total population: 67 522 000 Year: 2021 Nationality: - French population: 87% German population: 2% Year: 2022 Paris neighborhoods: - 1st district of Paris: Louvre Paris II district: Bourse Year: 2023 Additional information:
要运行我的脚本,您必须创建文件夹Lands/France/france.yml
我将非常感激有关如何在“描述”栏中换行文字的任何提示!
要在 Excel 中使 Description 列的内容换行,可以通过设置单元格的 自动换行 功能来实现。您可以使用 openpyxl 直接设置单元格的属性。下面是您修改代码的相关部分,添加了自动换行的功能。
Description
openpyxl
dfs_tabs
alignment
import yaml import os import pandas as pd from openpyxl.utils import get_column_letter from openpyxl.styles import Alignment import sys def yaml_to_excel(dictionary, row): for key, value in dictionary.items(): if type(value) is dict: yield from yaml_to_excel(value, row + 1) elif type(value) is list: for item in value: yield from yaml_to_excel(item, row + 1) row += 1 else: yield (key, value, row) def get_all_subdirectiries_in_path(root_dir): all_subdirectories = [] for file in os.listdir(root_dir): d = os.path.join(root_dir, file) if os.path.isdir(d): all_subdirectories.append(d) return all_subdirectories def dfs_tabs(df_list, sheet_list, file_name, columns): writer = pd.ExcelWriter(file_name, engine='openpyxl') for dataframe, sheet, columns_list in zip(df_list, sheet_list, columns): dataframe.to_excel(writer, sheet_name=sheet, startrow=0, startcol=0, index=False, columns=columns_list) # Get the sheet object from the writer worksheet = writer.sheets[sheet] for column in dataframe: column_width = max(dataframe[column].astype(str).map(len).max() + 5, len(column) + 5) col_letter = get_column_letter(dataframe.columns.get_loc(column) + 1) worksheet.column_dimensions[col_letter].width = column_width # Set wrap text for 'Description' column (or any other column you need) if column == 'Description': # This assumes 'Description' is one of the columns for row in worksheet.iter_rows(min_row=2, max_row=len(dataframe) + 1, min_col=1, max_col=1): for cell in row: cell.alignment = Alignment(wrap_text=True) writer.save() def get_nested_dir_path(main_dir): if any(File.endswith(".yml") for File in os.listdir(main_dir)): yield main_dir else: for file in os.listdir(main_dir): yield from get_nested_dir_path(main_dir + f"/{file}") def main(dir_path): all_sheets = [] sheet_names = [] all_columns = [] for subdirectory in get_nested_dir_path(dir_path): final_object = [] last_row = 1 current_row = 1 for file in os.listdir(subdirectory): if (file.split(".")[1] == "yml") | (file.split(".")[1] == "yaml"): with open(subdirectory + f"/{file}", "r", encoding="utf-8") as f: dataMap = yaml.safe_load(f) last_row += 1 hierarchy_obj = {} hierarchy = 1 prev_value = "" prev_temp_depth = 2 current_row += 1 starting_row = current_row for key, value, temp_depth in yaml_to_excel(dataMap, last_row): if key not in hierarchy_obj: hierarchy_obj[key] = hierarchy hierarchy += 1 if prev_value != "": if hierarchy_obj[key] < hierarchy_obj[prev_value]: current_row += 1 if (temp_depth > prev_temp_depth) & (hierarchy_obj[key] > hierarchy_obj[prev_value]): current_row += 1 if type(value) is bool: if value: value = 'yes' else: value = 'no' final_object.append( { "column": hierarchy_obj[key], "value": value, "row": current_row, } ) prev_value = key prev_temp_depth = temp_depth columns_arr = [] for column_name in hierarchy_obj.keys(): columns_arr.append(column_name) all_columns.append(columns_arr) last_row_in_excel = final_object[len(final_object)-1]['row'] main_arr = [] for i in range(last_row_in_excel): main_arr.append(["" for i in range(len(columns_arr))]) main_arr = [main_arr] for item in final_object: if (type(item['value']) == int) | (type(item['value']) == float): main_arr[0][item['row'] - 1][item['column'] - 1] = item['value'] else: main_arr[0][item['row'] - 1][item['column'] - 1] = item['value'] result = pd.DataFrame(main_arr[0], columns=columns_arr) result = result[1:] all_sheets.append(result) sheet_names.append(subdirectory[subdirectory.rfind('/') + 1:].replace("/", " ").replace("_", " ")[0:31]) dfs_tabs(all_sheets, sheet_names, 'test.xlsx', all_columns) if __name__ == "__main__": dir_path = 'Lands' main(dir_path)
openpyxl.styles.Alignment(wrap_text=True)
wrap_text=True
Alignment(wrap_text=True)
columns_arr
if column == 'Description'
希望这些修改能解决您的问题,让您在 Excel 中正确显示换行的 Description 列!