我们从Python开源项目中,提取了以下23个代码示例,用于说明如何使用PyPDF2.PdfFileWriter()。
def render_pdf(self): outpdf = PdfFileWriter() for page in self.pages: if page.extension == "pdf": # the page is already a PDF so append directly outpdf.addPage(PdfFileReader(BytesIO(page.binary)).getPage(0)) else: # otherwise, the page is an image that needs to be converted to PDF first buf = BytesIO() img = Image.open(BytesIO(page.binary)) img.convert("RGB").save(buf, format="pdf") # once image is PDF, it can be appended outpdf.addPage(PdfFileReader(buf).getPage(0)) pdf_page_buf = BytesIO() outpdf.write(pdf_page_buf) return(pdf_page_buf.getvalue())
def pdf_page_to_png(src_pdf, pagenum = 0, resolution = 72,): ''' Returns specified PDF page as wand.image.Image png. :param PyPDF2.PdfFileReader src_pdf: PDF from which to take pages. :param int pagenum: Page number to take. :param int resolution: Resolution for resulting png in DPI. ''' dst_pdf = PyPDF2.PdfFileWriter() dst_pdf.addPage(src_pdf.getPage(pagenum)) pdf_bytes = io.BytesIO() dst_pdf.write(pdf_bytes) pdf_bytes.seek(0) img = Image(file = pdf_bytes, resolution = resolution) img.convert("png") return img # Example of converting exam.pdf located at the same direcory # convert('exam') # NOTE : default resolution is 72 dpi
def outputpapertemplate(self, dest, listchar, output=None): if output == None: output = PyPDF2.PdfFileWriter() while listchar: iopage = self.outputtemplateonepage(listchar) page = PyPDF2.PdfFileReader(iopage) output.addPage(page.getPage(0)) if dest != None: if isinstance(dest, str): # when dest is a file path destdir = os.path.dirname(dest) if destdir != '' and not os.path.isdir(destdir): os.makedirs(destdir) with open(dest, "wb") as w: output.write(w) else: # when dest is io.IOBase output.write(dest) else: return output
def pdf_splitter(self): self.log.info('Called pdf_splitter') input_pdf = PdfFileReader(file(self.pdf_file, 'rb')) self.total_pages = input_pdf.numPages for page_number in range(self.total_pages): output = PdfFileWriter() output.addPage(input_pdf.getPage(page_number)) # new filename new_pdf = '_%s%s' % (str(page_number+1), '.pdf') new_pdf = self.pdf_file.replace('.pdf', new_pdf) file_stream = file(new_pdf, 'wb') output.write(file_stream) file_stream.close() # calling pdf to image conversion self.pdf_to_image(new_pdf)
def pdf_page_to_png(src_pdf, pagenum=0, resolution=154): """ Returns specified PDF page as wand.image.Image png. :param PyPDF2.PdfFileReader src_pdf: PDF from which to take pages. :param int pagenum: Page number to take. :param int resolution: Resolution for resulting png in DPI. """ check_dependencies(__optional_dependencies__['pdf']) # Import libraries within this function so as to avoid import-time dependence import PyPDF2 from wand.image import Image # TODO: When we start using this again, document which system-level libraries are required. dst_pdf = PyPDF2.PdfFileWriter() dst_pdf.addPage(src_pdf.getPage(pagenum)) pdf_bytes = io.BytesIO() dst_pdf.write(pdf_bytes) pdf_bytes.seek(0) img = Image(file=pdf_bytes, resolution=resolution) img.convert("png") return img
def get_pages(pdf_filename, from_, to): if to < from_: to = from_ file = open(pdf_filename, 'rb') pdf = PdfFileReader(file) output = PdfFileWriter() for i in range(from_ - 1, to): output.addPage(pdf.getPage(i)) stream = BytesIO() output.write(stream) data = stream.getvalue() file.close() return data
def slice(self, pdf_file_path, page_actions, final_pdf_path): """ Create new pdf from a slice of pages of a PDF :param pdf_file_path: path of the source PDF document, from which a new PDF file will be created. :param pages_actions: list of tuples, each tuple containing the page number and the clockwise rotation to be applied. The page number is non-zero indexed (first is page 1, and so on). :return: None. Writes the resulting PDF file into the provided path. """ output = PdfFileWriter() with open(pdf_file_path, 'rb') as file_input: input = PdfFileReader(file_input, strict=False) # Check page actions correspond to valid input PDF pages input_num_pages = input.getNumPages() actions_page_numbers = zip(*page_actions)[0] largest_page_num = max(actions_page_numbers) lowest_page_num = min(actions_page_numbers) if lowest_page_num < 1: raise Exception(u"Invalid page numbers range in actions: page numbers cannot be lower than 1.") if (largest_page_num - 1) > input_num_pages: raise Exception(u"Invalid page numbers range in actions: page numbers cannot exceed the maximum numbers" u"of pages of the source PDF document.") # Perform actual slicing + rotation for num_page, rotation in page_actions: output.addPage(input.getPage(num_page-1).rotateCounterClockwise(rotation) if rotation else input.getPage(num_page-1)) self._write_to_pdf(output, final_pdf_path)
def save(self, filename=None, overwrite=False): """ Applies the bates numbers and saves to file. Args: filename (str): Path where the PDF should be saved. overwrite (bool): Switch to allow overwriting of existing files. Returns: str: Path where the file was saved. Raises: FileExistsError: When the file already exists and overwrite is not enabled. """ filename = filename or "{begin}.pdf".format(begin=self.begin) if os.path.exists(filename) and not overwrite: raise FileExistsError("PDF file {} already exists and overwrite is disabled.".format(filename)) with open(filename, "wb") as out_file: writer = PdfFileWriter() for page in self: page.apply() writer.addPage(page.page) writer.write(out_file) return filename
def __init__(self, path): self.path = path reader = PdfFileReader(open(path, "rb")) self.writer = PdfFileWriter() self.writer.appendPagesFromReader(reader) self.writer.addMetadata({k: v for k, v in reader.getDocumentInfo().items() if isinstance(v, (utils.string_type, utils.bytes_type))})
def generate_document(self, data): packet = StringIO() if self.template_file is not None: template = PdfFileReader(open(self.template_file, 'rb')) c = canvas.Canvas(packet, pagesize=(self.width, self.height)) i = 0 for field_cls in self.fields: # TODO: Catch exception if there is less columns than fields field = field_cls(self, c, data[i]) field.render() i += 1 # Save canvas c.save() packet.seek(0) text = PdfFileReader(packet) output = PdfFileWriter() if self.template_file is not None: # Merge text with base page = template.getPage(0) page.mergePage(text.getPage(0)) else: page = text.getPage(0) output.addPage(page) # Save file filename = "%s/%s.pdf" % (self.output_dir, self.generate_filename(data)) outputStream = open(filename, 'wb') output.write(outputStream) outputStream.close()
def add_outlines(toc, filename, output): build_outlines_btree(toc) pdf_out = PdfFileWriter() pdf_in = PdfFileReader(open(filename, 'rb')) for p in pdf_in.pages: pdf_out.addPage(p) toc_num = len(toc) idoix = len(pdf_out._objects) + 1 idorefs = [PDF.IndirectObject(x + idoix, 0, pdf_out) for x in range(toc_num + 1)] ol = PDF.DictionaryObject() ol.update({ PDF.NameObject("/Type"): PDF.NameObject("/Outlines"), PDF.NameObject("/First"): idorefs[1], PDF.NameObject("/Last"): idorefs[-1], PDF.NameObject("/Count"): PDF.NumberObject(toc_num) }) olitems = [] for t in toc: oli = PDF.DictionaryObject() oli.update({ PDF.NameObject("/Title"): PDF.TextStringObject(t["title"].decode("utf-8")), PDF.NameObject("/Dest"): make_dest(pdf_out, t["page"]) }) opt_keys = {"real_parent": "/Parent", "prev": "/Prev", "next": "/Next", "first": "/First", "last": "/Last"} for k, v in opt_keys.items(): n = getattr(t["node"], k)() if n is not None: oli.update({ PDF.NameObject(v): idorefs[n.index] }) olitems.append(oli) pdf_out._addObject(ol) for i in olitems: pdf_out._addObject(i) pdf_out._root_object.update({ PDF.NameObject("/Outlines"): idorefs[0] }) outputFile = open(output, "wb") pdf_out.write(outputFile) outputFile.close()
def attach(self, binary): # determine the format of the file ext = puremagic.from_string(binary) page = None # if the attachment is a PDF if ext == ".pdf": # use PyPDF2 to read the stream pdf = PdfFileReader(BytesIO(binary)) # if it is a multi-page PDF if pdf.getNumPages() > 1: # add the pages individually for pdf_page in pdf.pages: output = PdfFileWriter() output.addPage(pdf_page) pdf_page_buf = BytesIO() output.write(pdf_page_buf) page = self.add_page(pdf_page_buf.getvalue()) # if it is just a single page PDF else: # then add the original bytestream page = self.add_page(binary) # if the attachment is a recognized image elif ext in [".png", ".jfif", ".gif", ".jpeg", ".jpg"]: page = self.add_page(binary) # could not recognize file else: pass if page: return(page)
def BurnSudoOnPdf(path,numpage,diffarray): pdf = PdfFileWriter() # Using ReportLab Canvas to insert image into PDF imgTemp = BytesIO() imgDoc = canvas.Canvas(imgTemp, pagesize=A4) # Draw image on Canvas and save PDF in buffer pdfmetrics.registerFont(TTFont('VeraIt', 'VeraIt.ttf')) for i in range(len(path)): if ((i+1)%2==0): x=345 else: x=55 if (i<2): y=590 elif (i<4): y=320 else: y=50 imgDoc.drawImage(path[i], x, y,200,200) imgDoc.setFont('VeraIt', 9) imgDoc.drawString(x+2,y+203,getStrDiff(diffarray[i])) pdfmetrics.registerFont(TTFont('Vera', 'Vera.ttf')) pdfmetrics.registerFont(TTFont('VeraBd', 'VeraBd.ttf')) pdfmetrics.registerFont(TTFont('VeraIt', 'VeraIt.ttf')) #pdfmetrics.registerFont(TTFont('VeraBI', 'VeraBI.ttf')) imgDoc.setFont('Vera', 13) imgDoc.drawString(30,820,"BurnYourPc Organization/") imgDoc.setFont('VeraBd', 9) imgDoc.drawString(197,820,"Sudoku Project") imgDoc.setFont('VeraIt', 8) imgDoc.drawString(430,20,"By PantelisPanka, nikfot, TolisChal") imgDoc.setFont('Vera', 8) imgDoc.drawString(550,820,str(numpage)) imgDoc.save() # Use PyPDF to merge the image-PDF into the template pdf.addPage(PdfFileReader(BytesIO(imgTemp.getvalue())).getPage(0)) pdf.write(open("output"+ str(numpage)+".pdf","wb"))
def split_pdf(fp, pagenos=[]): # Remove any possible duplicate pages pagenos = list(set(pagenos)) # Create the pdf reader inputpdf = PdfFileReader(fp) # Create the pdf writer output = PdfFileWriter() # Loop through all the page numbers we want to split for i in pagenos: # Add each page to the writer output.addPage(inputpdf.getPage(i)) return output # Gets all top level sections from the PDF
def postprocess_pdf(input_pdf, qr_data, qr_x=545, qr_y=20, version=None): """ PDF post-processor. Append QR code on each PDF page. :param input_pdf: PDF byte content :param qr_data: QR code data :param qr_x: X possition of QR image :param qr_y: Y possition of QR image """ qr = pyqrcode.create(qr_data, version=version) eps = StringIO() qr.eps(eps) eps.seek(0) qr_pdf = BytesIO() qr_img = Image(file=BytesIO(bytes(eps.read(), 'utf-8'))) qr_img.format = 'pdf' qr_img.save(qr_pdf) qr_page = PdfFileReader(qr_pdf).getPage(0) output_writer = PdfFileWriter() output_pdf = BytesIO() for page in PdfFileReader(BytesIO(input_pdf)).pages: page.mergeTranslatedPage(qr_page, qr_x, qr_y) output_writer.addPage(page) output_writer.write(output_pdf) output_pdf.seek(0) return output_pdf.read()
def merge_pdfs(self, final_pdf_path, actions, append_blank_page=True): """ Merge pdf files in only one PDF :param final_pdf_path: file path to save pdf :param actions: list of tuples, each tuple containing a PDF file path and the degrees of counterclockwise rotation to perform on the PDF document. :param append_blank_page: append a blank page between documents if True. :return: """ """ Merge all pdf of a folder in one single file '.pdf'. """ output = PdfFileWriter() docs_to_close = [] for num_doc, (pdf_file_path, rotation) in enumerate(actions): if pdf_file_path == final_pdf_path: continue if not pdf_file_path: continue logging.info(u"Parse '%s'" % pdf_file_path) try: document_file = open(pdf_file_path, 'rb') document = PdfFileReader(document_file, strict=False) num_pages = document.getNumPages() except Exception as exc: logging.exception("Error merging pdf %s: %s" % (pdf_file_path, str(exc))) raise DocumentClipperError # Rotation must be performed per page, not per document for num_page in range(num_pages): page = document.getPage(num_page) page = page.rotateCounterClockwise(rotation) output.addPage(page) if append_blank_page: output.addBlankPage() docs_to_close.append(document_file) self._write_to_pdf(output, final_pdf_path) self._close_files(docs_to_close)
def main(): context = { 'instructor': INSTRUCTOR, 'course': COURSE, 'student': STUDENT, 'hours': HOURS, } styles = getSampleStyleSheet() styles.add( ParagraphStyle( name='Justify', alignment=TA_JUSTIFY, fontSize=16, leading=22 ) ) elements = [] certificate_txt = render(TEXT_PATH, context) paragraphs = certificate_txt.split(os.linesep) elements.append(Spacer(1, 50)) for p in paragraphs: elements.append(Paragraph(p, styles['Justify'])) elements.append(Spacer(1, 16)) certificate = tempfile.NamedTemporaryFile() doc = SimpleDocTemplate(certificate, topMargin=3 * cm, bottomMargin=0) doc.pagesize = landscape(A4) doc.build(elements) output = PdfFileWriter() template_file = open('template.pdf', 'rb') input1 = PdfFileReader(template_file) page1 = input1.getPage(0) content = PdfFileReader(certificate) page1.mergePage(content.getPage(0)) output.addPage(page1) save_file = open(SAVE_AS, 'wb') output.write(save_file) print('Certificado gerado com sucesso em %s' % SAVE_AS)
def read(self, path): self.filename = os.path.basename(path) self.file_basename, self.file_extension = os.path.splitext(self.filename) self.path = path self.mime_type = mimetypes.guess_type(path) self.file_basepath = os.path.dirname(path) # If the file is a pdf, split the pdf and prep the pages. if self.mime_type[0] == "application/pdf": file_temp = open(self.path, 'rb') pdf_reader = pyPdf.PdfFileReader(file_temp) self.num_pages = pdf_reader.numPages try: for i in xrange(self.num_pages): output = pyPdf.PdfFileWriter() output.addPage(pdf_reader.getPage(i)) path = 'temp.pdf' im_path = 'temp.png' with open(path, 'wb') as f: output.write(f) im = PythonMagick.Image() im.density("300") im.read(path) im.write(im_path) orig_im = cv2.imread(im_path, 0) page = Page(orig_im, i, self.lang) self.pages.append(page) os.remove(path) os.remove(im_path) self.prepared = True except Exception as e: self.error = e raise # If the file is an image, think of it as a 1-page pdf. elif self.mime_type[0] in acceptable_mime: self.num_pages = 1 im = PythonMagick.Image() im.density("300") im.read(path) temp_path = os.path.normpath(os.path.join( self.file_basepath, self.file_basename + '_temp.png' )) im.write(temp_path) orig_im = cv2.imread(temp_path, 0) os.remove(temp_path) page = Page(orig_im, 0) self.pages.append(page) # Otherwise, out of luck. else: print(self.mime_type[0]) raise FileNotAcceptedException
def overlay_hocr_pages(self, dpi, hocr_filenames, orig_pdf_filename): logging.debug("Going to overlay following files onto %s" % orig_pdf_filename) # Sort the hocr_filenames into natural keys! hocr_filenames.sort(key=lambda x: self.natural_keys(x[0] )) logging.debug(hocr_filenames) pdf_dir, pdf_basename = os.path.split(orig_pdf_filename) basename = os.path.splitext(pdf_basename)[0] pdf_filename = os.path.join(pdf_dir, "%s_ocr.pdf" % (basename)) text_pdf_filenames = [] for img_filename, hocr_filename in hocr_filenames: text_pdf_filename = self.overlay_hocr_page(dpi, hocr_filename, img_filename) logging.info("Created temp OCR'ed pdf containing only the text as %s" % (text_pdf_filename)) text_pdf_filenames.append(text_pdf_filename) writer = PdfFileWriter() orig = open(orig_pdf_filename, 'rb') for orig_pg, text_pg_filename in zip(self.iter_pdf_page(orig), text_pdf_filenames): text_file = open(text_pg_filename, 'rb') text_pg = self.iter_pdf_page(text_file).next() orig_rotation_angle = int(orig_pg.get('/Rotate', 0)) if orig_rotation_angle != 0: logging.info("Original Rotation: %s" % orig_pg.get("/Rotate", 0)) self.mergeRotateAroundPointPage(orig_pg, text_pg, orig_rotation_angle, text_pg.mediaBox.getWidth()/2, text_pg.mediaBox.getWidth()/2) # None of these commands worked for me: #orig_pg.rotateCounterClockwise(orig_rotation_angle) #orig_pg.mergeRotatedPage(text_pg,text_rotation_angle) else: orig_pg.mergePage(text_pg) orig_pg.compressContentStreams() writer.addPage(orig_pg) with open(pdf_filename, 'wb') as f: # Flush out this page merge so we can close the text_file writer.write(f) text_file.close() orig.close() for fn in text_pdf_filenames: os.remove(fn) logging.info("Created OCR'ed pdf as %s" % (pdf_filename)) return pdf_filename
def overlay_hocr_pages(self, dpi, hocr_filenames, orig_pdf_filename): logging.debug("Going to overlay following files onto %s" % orig_pdf_filename) # Sort the hocr_filenames into natural keys! hocr_filenames.sort(key=lambda x: self.natural_keys(x[0] )) logging.debug(hocr_filenames) pdf_dir, pdf_basename = os.path.split(orig_pdf_filename) basename = os.path.splitext(pdf_basename)[0] pdf_filename = os.path.join(pdf_dir, "%s_ocr.pdf" % (basename)) text_pdf_filenames = [] for img_filename, hocr_filename in hocr_filenames: text_pdf_filename = self.overlay_hocr_page(dpi, hocr_filename, img_filename) logging.info("Created temp OCR'ed pdf containing only the text as %s" % (text_pdf_filename)) text_pdf_filenames.append(text_pdf_filename) # Now, concatenate this text_pdfs into one single file. # This is a hack to save memory/running time when we have to do the actual merge with a writer all_text_filename = os.path.join(pdf_dir, "%s_text.pdf" % (basename)) merger = PdfFileMerger() for text_pdf_filename in text_pdf_filenames: merger.append(PdfFileReader(file(text_pdf_filename, 'rb'))) merger.write(all_text_filename) merger.close() del merger writer = PdfFileWriter() orig = open(orig_pdf_filename, 'rb') text_file = open(all_text_filename, 'rb') for orig_pg, text_pg in zip(self.iter_pdf_page(orig), self.iter_pdf_page(text_file)): orig_pg = self._get_merged_single_page(orig_pg, text_pg) writer.addPage(orig_pg) with open(pdf_filename, 'wb') as f: # Flush out this page merge so we can close the text_file writer.write(f) orig.close() text_file.close() # Windows sometimes locks the temp text file for no reason, so we need to retry a few times to delete for fn in text_pdf_filenames: #os.remove(fn) Retry(partial(os.remove, fn), tries=10, pause=3).call_with_retry() os.remove(all_text_filename) logging.info("Created OCR'ed pdf as %s" % (pdf_filename)) return pdf_filename