是否可以使用 Python 合并单独的 PDF 文件?
假设如此,我需要进一步扩展这一点。我希望循环遍历目录中的文件夹并重复此过程。
我可能有点运气不好,但是否有可能排除每个 PDF 中包含的页面(我的报告生成总是会创建一个额外的空白页)。
是的,可以使用 Python 中的库(例如PyPDF2
或 )合并单独的 PDF 文件pypdf
。此外,您可以循环遍历目录来合并 PDF 文件并排除特定页面。
pypdf
下面是如何使用(的改进和维护版本)实现此目的的示例PyPDF2
:
pypdf
:首先,安装pypdf
库
pip install pypdf
下面是合并文件夹中的 PDF 文件的示例,但不包括最后一页(如果它是空白的)
import os
from pypdf import PdfReader, PdfWriter
def merge_pdfs_in_folder(folder_path, output_path, exclude_last_page=True):
pdf_writer = PdfWriter()
pdf_writer = PdfWriter()
pdf_writer = Pdf
pdf_writer =
pdf_writer
pdf
# Loop through all the files in the folder
for filename in os.listdir(folder_path):
if filename.endswith('.pdf'):
pdf_path = os.path.join(folder_path, filename)
pdf_reader = PdfReader(pdf_path)
pdf_path = os.path.join(folder_path, filename)
pdf_reader = PdfReader(pdf_path)
pdf_path = os.path.join(folder_path, filename)
pdf_reader = PdfReader(pdf_path)
pdf_path = os.path.join(folder_path, filename)
pdf_reader = PdfReader(pdf_path
pdf_path = os.path.join(folder_path, filename)
pdf_reader = PdfReader(pdf
pdf_path = os.path.join(folder_path, filename)
pdf_reader = PdfReader
pdf_path = os.path.join(folder_path, filename)
pdf_reader = Pdf
pdf_path = os.path.join(folder_path, filename)
pdf_reader =
pdf_path = os.path.join(folder_path, filename)
pdf
pdf_path = os.path.join(folder_path, filename)
pdf_path = os.path.join(folder_path, filename
pdf_path = os.path.join(folder_path,
pdf_path = os.path.join
pdf_path = os.path
pdf_path =
pdf_path
# Determine the number of pages, and exclude the last page if needed
num_pages =
num_pages
len(pdf_reader.pages)
if exclude_last_page:
num_pages -=
num_pages -=
num_pages -=
1 # Exclude last page
# Add all pages except the last one (if excluded)
for page_num in range(num_pages):
page = pdf_reader.pages[page_num]
pdf_writer.add_page(page)
page = pdf_reader.pages[page_num]
pdf_writer.add_page(page)
page = pdf_reader.pages[page_num]
pdf_writer.add_page(page
page = pdf_reader.pages[page_num]
pdf_writer
page = pdf_reader.pages[page_num]
page = pdf_reader.pages[page_num]
page = pdf_reader.pages[page_num
page = pdf_reader.pages[page
page = pdf
page
# Write the merged PDF to the output file
wit
with open(output_path, 'wb') as output_pdf:
pdf_writer.write(output_pdf)
pdf_writer.write(output_pdf)
pdf_writer.write(output_pdf)
pdf_writer.write(output_pdf
pdf_writer.write(output
pdf_writer.write
pdf_writer
pdf
print(f"PDFs from {folder_path} merged into {output_path}")
# Example usage
folder_to_merge =
folder_to_merge =
folder_to
folder
"path/to/folder" # Folder containing PDF files
output_pdf =
output_pdf =
"merged_output.pdf" # Output merged PDF file
merge_pdfs_in_folder(folder_to_merge, output_pdf)
merge_pdfs_in_folder(folder_to_merge, output_pdf)
``
merge_pdfs_in_folder(folder_to_merge, output_pdf)
merge_pdfs_in_folder(folder_to_merge, output_pdf
merge_pdfs_in_folder(folder_to_merge, output
merge_pdfs_in_folder(folder_to
merge_pdfs_in_folder(folder
merge_pdfs_in_folder
merge_pdfs
merge
merge_pdfs_in_folder()
:该函数PdfReader
使用PdfWriter
exclude_last_page
参数True
,脚本排除每个 PDF 的最后一页(假设它是空白页)。如果您想循环遍历多个目录并合并每个目录中的 PDF,您可以像这样修改代码:
def merge_pdfs_in_directories(base_directory, output_directory):
for root, dirs, files in os.walk(base_directory):
if files:
folder_name = os.path.basename(root)
output_pdf = os.path.join(output_directory,
folder_name = os.path.basename(root)
output_pdf = os.path.join(output_directory, f
folder_name = os.path.basename(root)
output_pdf = os.path.join(output_directory,
folder_name = os.path.basename(root)
output_pdf = os.path.join(output_directory
folder_name = os.path.basename(root)
output_pdf = os.path.join(output
folder_name = os.path.basename(root)
output_pdf = os.path
folder_name = os.path.basename(root)
output_pdf = os
folder_name = os.path.basename(root)
output_pdf =
folder_name = os.path.basename(root)
output_pdf
folder_name = os.path.basename(root)
output
folder_name = os.path.basename(root)
folder_name = os.path.basename(root)
folder_name = os.path.basename(root
folder_name = os.path.basename
folder_name = os.path
folder_name = os
folder_name =
folder_name
folder
f"merged_{folder_name}.pdf")
merge_pdfs_in_folder(root, output_pdf)
merge_pdfs_in_folder(root, output_pdf
merge_pdfs_in_folder(root,
merge_pdfs_in_folder
merge_pdfs_in
merge_p
# Example usage
base_dir =
base_dir =
base_dir
base
"path/to/main_directory" # Base directory containing subfolders
output_dir =
output_dir =
output_dir
output
"path/to/output_directory" # Where to save the merged PDFs
merge_pdfs_in_directories(base_dir, output_dir)
merge_pdfs_in_directories(base_dir, output_dir)
``
merge_pdfs_in_directories(base_dir, output_dir)
merge_pdfs_in_directories(base_dir, output_dir
merge_pdfs_in_directories(base_dir, output
merge_pdfs_in_directories(base_dir,
merge_pdfs_in_directories(base_dir
merge_pdfs_in_directories(base
merge_pdfs_in_directories
merge_pdfs_in_direct
merge_pdfs
merge_p
merge
这将循环遍历 里面的所有目录base_directory
。output_directory