小能豆

合并 PDF 文件

python

是否可以使用 Python 合并单独的 PDF 文件?

假设如此,我需要进一步扩展这一点。我希望循环遍历目录中的文件夹并重复此过程。

我可能有点运气不好,但是否有可能排除每个 PDF 中包含的页面(我的报告生成总是会创建一个额外的空白页)。


阅读 18

收藏
2024-09-20

共1个答案

小能豆

是的,可以使用 Python 中的库(例如PyPDF2或 )合并单独的 PDF 文件pypdf。此外,您可以循环遍历目录来合并 PDF 文件并排除特定页面。

pypdf下面是如何使用(的改进和维护版本)实现此目的的示例PyPDF2

1.安装pypdf

首先,安装pypdf

pip install pypdf

2.

下面是合并文件夹中的 PDF 文件的示例,但不包括最后一页(如果它是空白的)

import os
from pypdf import PdfReader, PdfWriter

def merge_pdfs_in_folder(folder_path, output_path, exclude_last_page=True):
    pdf_writer = PdfWriter()


    pdf_writer = PdfWriter()


    pdf_writer = Pdf

    pdf_writer =

    pdf_writer

    pdf


# Loop through all the files in the folder


for filename in os.listdir(folder_path):


if filename.endswith('.pdf'):
            pdf_path = os.path.join(folder_path, filename)
            pdf_reader = PdfReader(pdf_path)


            pdf_path = os.path.join(folder_path, filename)
            pdf_reader = PdfReader(pdf_path)



            pdf_path = os.path.join(folder_path, filename)
            pdf_reader = PdfReader(pdf_path)


            pdf_path = os.path.join(folder_path, filename)
            pdf_reader = PdfReader(pdf_path

            pdf_path = os.path.join(folder_path, filename)
            pdf_reader = PdfReader(pdf

            pdf_path = os.path.join(folder_path, filename)
            pdf_reader = PdfReader

            pdf_path = os.path.join(folder_path, filename)
            pdf_reader = Pdf

            pdf_path = os.path.join(folder_path, filename)
            pdf_reader =

            pdf_path = os.path.join(folder_path, filename)
            pdf

            pdf_path = os.path.join(folder_path, filename)


            pdf_path = os.path.join(folder_path, filename

            pdf_path = os.path.join(folder_path,

            pdf_path = os.path.join

            pdf_path = os.path

            pdf_path =

            pdf_path


# Determine the number of pages, and exclude the last page if needed
            num_pages = 
            num_pages


len(pdf_reader.pages)
            if exclude_last_page:
                num_pages -= 
                num_pages -= 

                num_pages -=


1  # Exclude last page



# Add all pages except the last one (if excluded)


for page_num in range(num_pages):
                page = pdf_reader.pages[page_num]
                pdf_writer.add_page(page)


                page = pdf_reader.pages[page_num]
                pdf_writer.add_page(page)



                page = pdf_reader.pages[page_num]
                pdf_writer.add_page(page

                page = pdf_reader.pages[page_num]
                pdf_writer

                page = pdf_reader.pages[page_num]


                page = pdf_reader.pages[page_num]


                page = pdf_reader.pages[page_num

                page = pdf_reader.pages[page

                page = pdf

                page


# Write the merged PDF to the output file

    wit


with open(output_path, 'wb') as output_pdf:
        pdf_writer.write(output_pdf)


        pdf_writer.write(output_pdf)



        pdf_writer.write(output_pdf)


        pdf_writer.write(output_pdf

        pdf_writer.write(output

        pdf_writer.write

        pdf_writer

        pdf


print(f"PDFs from {folder_path} merged into {output_path}")

# Example usage
folder_to_merge = 
folder_to_merge =

folder_to

folder
"path/to/folder"  # Folder containing PDF files
output_pdf = 
output_pdf =
"merged_output.pdf"  # Output merged PDF file
merge_pdfs_in_folder(folder_to_merge, output_pdf)

merge_pdfs_in_folder(folder_to_merge, output_pdf)
``

merge_pdfs_in_folder(folder_to_merge, output_pdf)

merge_pdfs_in_folder(folder_to_merge, output_pdf

merge_pdfs_in_folder(folder_to_merge, output

merge_pdfs_in_folder(folder_to

merge_pdfs_in_folder(folder

merge_pdfs_in_folder

merge_pdfs

merge

解释:

  1. 循环遍历文件夹merge_pdfs_in_folder():该函数
  2. 合并 PDF:它PdfReader使用PdfWriter
  3. 排除最后一页exclude_last_page参数True,脚本排除每个 PDF 的最后一页(假设它是空白页)。

3.扩展以循环遍历多个目录:

如果您想循环遍历多个目录并合并每个目录中的 PDF,您可以像这样修改代码:

def merge_pdfs_in_directories(base_directory, output_directory):


for root, dirs, files in os.walk(base_directory):
        if files:
            folder_name = os.path.basename(root)
            output_pdf = os.path.join(output_directory, 
            folder_name = os.path.basename(root)
            output_pdf = os.path.join(output_directory, f

            folder_name = os.path.basename(root)
            output_pdf = os.path.join(output_directory,

            folder_name = os.path.basename(root)
            output_pdf = os.path.join(output_directory

            folder_name = os.path.basename(root)
            output_pdf = os.path.join(output

            folder_name = os.path.basename(root)
            output_pdf = os.path

            folder_name = os.path.basename(root)
            output_pdf = os

            folder_name = os.path.basename(root)
            output_pdf =

            folder_name = os.path.basename(root)
            output_pdf

            folder_name = os.path.basename(root)
            output

            folder_name = os.path.basename(root)


            folder_name = os.path.basename(root)

            folder_name = os.path.basename(root

            folder_name = os.path.basename

            folder_name = os.path

            folder_name = os

            folder_name =

            folder_name

            folder


f"merged_{folder_name}.pdf")
            merge_pdfs_in_folder(root, output_pdf)


            merge_pdfs_in_folder(root, output_pdf

            merge_pdfs_in_folder(root,

            merge_pdfs_in_folder

            merge_pdfs_in

            merge_p


# Example usage
base_dir = 
base_dir =

base_dir

base
"path/to/main_directory"  # Base directory containing subfolders
output_dir = 
output_dir =

output_dir

output
"path/to/output_directory"  # Where to save the merged PDFs
merge_pdfs_in_directories(base_dir, output_dir)

merge_pdfs_in_directories(base_dir, output_dir)
``

merge_pdfs_in_directories(base_dir, output_dir)

merge_pdfs_in_directories(base_dir, output_dir

merge_pdfs_in_directories(base_dir, output

merge_pdfs_in_directories(base_dir,

merge_pdfs_in_directories(base_dir

merge_pdfs_in_directories(base

merge_pdfs_in_directories

merge_pdfs_in_direct

merge_pdfs

merge_p

merge

这将循环遍历 里面的所有目录base_directoryoutput_directory

2024-09-20