是否可以使用 Python 合并单独的 PDF 文件?
假设如此,我需要进一步扩展这一点。我希望循环遍历目录中的文件夹并重复此过程。
我可能有点运气不好,但是否有可能排除每个 PDF 中包含的页面(我的报告生成总是会创建一个额外的空白页)。
是的,可以使用 Python 中的库(例如PyPDF2或 )合并单独的 PDF 文件pypdf。此外,您可以循环遍历目录来合并 PDF 文件并排除特定页面。
PyPDF2
pypdf
pypdf下面是如何使用(的改进和维护版本)实现此目的的示例PyPDF2:
首先,安装pypdf库
pip install pypdf
下面是合并文件夹中的 PDF 文件的示例,但不包括最后一页(如果它是空白的)
import os from pypdf import PdfReader, PdfWriter def merge_pdfs_in_folder(folder_path, output_path, exclude_last_page=True): pdf_writer = PdfWriter() pdf_writer = PdfWriter() pdf_writer = Pdf pdf_writer = pdf_writer pdf # Loop through all the files in the folder for filename in os.listdir(folder_path): if filename.endswith('.pdf'): pdf_path = os.path.join(folder_path, filename) pdf_reader = PdfReader(pdf_path) pdf_path = os.path.join(folder_path, filename) pdf_reader = PdfReader(pdf_path) pdf_path = os.path.join(folder_path, filename) pdf_reader = PdfReader(pdf_path) pdf_path = os.path.join(folder_path, filename) pdf_reader = PdfReader(pdf_path pdf_path = os.path.join(folder_path, filename) pdf_reader = PdfReader(pdf pdf_path = os.path.join(folder_path, filename) pdf_reader = PdfReader pdf_path = os.path.join(folder_path, filename) pdf_reader = Pdf pdf_path = os.path.join(folder_path, filename) pdf_reader = pdf_path = os.path.join(folder_path, filename) pdf pdf_path = os.path.join(folder_path, filename) pdf_path = os.path.join(folder_path, filename pdf_path = os.path.join(folder_path, pdf_path = os.path.join pdf_path = os.path pdf_path = pdf_path # Determine the number of pages, and exclude the last page if needed num_pages = num_pages len(pdf_reader.pages) if exclude_last_page: num_pages -= num_pages -= num_pages -= 1 # Exclude last page # Add all pages except the last one (if excluded) for page_num in range(num_pages): page = pdf_reader.pages[page_num] pdf_writer.add_page(page) page = pdf_reader.pages[page_num] pdf_writer.add_page(page) page = pdf_reader.pages[page_num] pdf_writer.add_page(page page = pdf_reader.pages[page_num] pdf_writer page = pdf_reader.pages[page_num] page = pdf_reader.pages[page_num] page = pdf_reader.pages[page_num page = pdf_reader.pages[page page = pdf page # Write the merged PDF to the output file wit with open(output_path, 'wb') as output_pdf: pdf_writer.write(output_pdf) pdf_writer.write(output_pdf) pdf_writer.write(output_pdf) pdf_writer.write(output_pdf pdf_writer.write(output pdf_writer.write pdf_writer pdf print(f"PDFs from {folder_path} merged into {output_path}") # Example usage folder_to_merge = folder_to_merge = folder_to folder "path/to/folder" # Folder containing PDF files output_pdf = output_pdf = "merged_output.pdf" # Output merged PDF file merge_pdfs_in_folder(folder_to_merge, output_pdf) merge_pdfs_in_folder(folder_to_merge, output_pdf) `` merge_pdfs_in_folder(folder_to_merge, output_pdf) merge_pdfs_in_folder(folder_to_merge, output_pdf merge_pdfs_in_folder(folder_to_merge, output merge_pdfs_in_folder(folder_to merge_pdfs_in_folder(folder merge_pdfs_in_folder merge_pdfs merge
merge_pdfs_in_folder()
PdfReader
PdfWriter
exclude_last_page
True
如果您想循环遍历多个目录并合并每个目录中的 PDF,您可以像这样修改代码:
def merge_pdfs_in_directories(base_directory, output_directory): for root, dirs, files in os.walk(base_directory): if files: folder_name = os.path.basename(root) output_pdf = os.path.join(output_directory, folder_name = os.path.basename(root) output_pdf = os.path.join(output_directory, f folder_name = os.path.basename(root) output_pdf = os.path.join(output_directory, folder_name = os.path.basename(root) output_pdf = os.path.join(output_directory folder_name = os.path.basename(root) output_pdf = os.path.join(output folder_name = os.path.basename(root) output_pdf = os.path folder_name = os.path.basename(root) output_pdf = os folder_name = os.path.basename(root) output_pdf = folder_name = os.path.basename(root) output_pdf folder_name = os.path.basename(root) output folder_name = os.path.basename(root) folder_name = os.path.basename(root) folder_name = os.path.basename(root folder_name = os.path.basename folder_name = os.path folder_name = os folder_name = folder_name folder f"merged_{folder_name}.pdf") merge_pdfs_in_folder(root, output_pdf) merge_pdfs_in_folder(root, output_pdf merge_pdfs_in_folder(root, merge_pdfs_in_folder merge_pdfs_in merge_p # Example usage base_dir = base_dir = base_dir base "path/to/main_directory" # Base directory containing subfolders output_dir = output_dir = output_dir output "path/to/output_directory" # Where to save the merged PDFs merge_pdfs_in_directories(base_dir, output_dir) merge_pdfs_in_directories(base_dir, output_dir) `` merge_pdfs_in_directories(base_dir, output_dir) merge_pdfs_in_directories(base_dir, output_dir merge_pdfs_in_directories(base_dir, output merge_pdfs_in_directories(base_dir, merge_pdfs_in_directories(base_dir merge_pdfs_in_directories(base merge_pdfs_in_directories merge_pdfs_in_direct merge_pdfs merge_p merge
这将循环遍历 里面的所有目录base_directory。output_directory
base_directory
output_directory