Java 类com.itextpdf.text.pdf.PdfReader 实例源码

项目:presenca-vereadores-sjc    文件:PDFAttendanceParser.java   
private String getPdfContent(String pdfFile) {
    try {
        PdfReader reader = new PdfReader(pdfFile);
        StringBuffer sb = new StringBuffer();
        PdfReaderContentParser parser = new PdfReaderContentParser(reader);
        TextExtractionStrategy strategy;
        for (int i = 1; i <= reader.getNumberOfPages(); i++) {
            strategy = parser.processContent(i, new SimpleTextExtractionStrategy());
            sb.append(strategy.getResultantText());
        }
        reader.close();
        return sb.toString();
    } catch (IOException e) {
        throw new IllegalArgumentException("Not able to read file " + pdfFile, e);
    }
}
项目:testarea-itext5    文件:SwitchPageCanvas.java   
/**
 * <a href="http://stackoverflow.com/questions/34394199/i-cant-rotate-my-page-from-existing-pdf">
 * I can't rotate my page from existing PDF
 * </a>
 * <p>
 * Switching between portrait and landscape like this obviously will cut off some parts of the page.
 * </p>
 */
@Test
public void testSwitchOrientation() throws DocumentException, IOException
{
    try (InputStream resourceStream = getClass().getResourceAsStream("/mkl/testarea/itext5/extract/n2013.00849449.pdf"))
    {
        PdfReader reader = new PdfReader(resourceStream);
        int n = reader.getNumberOfPages();
        PdfDictionary pageDict;
        for (int i = 1; i <= n; i++) {
            Rectangle rect = reader.getPageSize(i);
            Rectangle crop = reader.getCropBox(i);
            pageDict = reader.getPageN(i);
            pageDict.put(PdfName.MEDIABOX, new PdfArray(new float[] {rect.getBottom(), rect.getLeft(), rect.getTop(), rect.getRight()}));
            pageDict.put(PdfName.CROPBOX, new PdfArray(new float[] {crop.getBottom(), crop.getLeft(), crop.getTop(), crop.getRight()}));
        }
        PdfStamper stamper = new PdfStamper(reader, new FileOutputStream(new File(RESULT_FOLDER, "n2013.00849449-switch.pdf")));
        stamper.close();
        reader.close();
    }
}
项目:testarea-itext5    文件:DenseMerging.java   
/**
 * <a href="http://stackoverflow.com/questions/28991291/how-to-remove-whitespace-on-merge">
 * How To Remove Whitespace on Merge
 * </a>
 * <p>
 * Testing {@link PdfDenseMergeTool} using the OP's files.
 * </p>
 */
@Test
public void testMergeGrandizerFiles() throws DocumentException, IOException
{
    try (   InputStream docA = getClass().getResourceAsStream("Header.pdf");
            InputStream docB = getClass().getResourceAsStream("Body.pdf");
            InputStream docC = getClass().getResourceAsStream("Footer.pdf");    )
    {
        PdfDenseMergeTool tool = new PdfDenseMergeTool(PageSize.A4, 18, 18, 5);
        PdfReader readerA = new PdfReader(docA);
        PdfReader readerB = new PdfReader(docB);
        PdfReader readerC = new PdfReader(docC);
        try (FileOutputStream fos = new FileOutputStream(new File(RESULT_FOLDER, "GrandizerMerge.pdf")))
        {
            List<PdfReader> inputs = Arrays.asList(readerA, readerB, readerC);
            tool.merge(fos, inputs);
        }
        finally
        {
            readerA.close();
            readerB.close();
            readerC.close();
        }
    }

}
项目:pdf-renderer    文件:ImageFactory.java   
public ImageInstance getImageByFile( PdfContentByte cb , File file ) throws IOException, BadElementException{
    Image image = null;
    ImageInstance instance = null;
    if( file.getName().toLowerCase().endsWith( ".pdf")){    
        PdfReader reader = new PdfReader( file.getAbsolutePath() );
        PdfImportedPage p = cb.getPdfWriter().getImportedPage(reader, 1);
        image = Image.getInstance(p);
        instance = new ImageInstance(image, reader);
    }else{
        image = Image.getInstance( file.getAbsolutePath() );
        instance = new ImageInstance(image, null);
    }

    instances.add(instance);


    return instance;
}
项目:Voting_2b    文件:PdfLetterTest.java   
@Test
public void testWritePlainLetter() {

    try {
        PdfReader reader = new PdfReader("letters/" + voter.getNif() + ".pdf");
        String page = PdfTextExtractor.getTextFromPage(reader, 1);
        String[] lines = page.split("\n");
        String email = lines[1].split(":\t")[1];
        String password = lines[2].split(":\t")[1];
        assertEquals(voter.getEmail(), email);
        assertEquals(voter.getPassword(), password);

    } catch (IOException e) {
        System.err.println("Archivo no encontrado");
    }

}
项目:ephesoft    文件:PDFUtil.java   
/**
 * The <code>closePassedStream</code> method closes the stream passed.
 * 
 * @param reader {@link PdfReader}
 * @param document {@link Document}
 * @param contentByte {@link PdfContentByte}
 * @param writer {@link PdfWriter}
 * @param fileInputStream {@link FileInputStream}
 * @param fileOutputStream {@link FileOutputStream}
 * @throws IOException {@link} if unable to close input or output stream
 */
private static void closePassedStream(final PdfReader reader, final Document document,
        final PdfContentByte contentByte, final PdfWriter writer,
        final FileInputStream fileInputStream, final FileOutputStream fileOutputStream) throws IOException {
    if (null != reader) {
        reader.close();
    } 
    if (null != document) {
        document.close();
    }
    if (null != contentByte) {
        contentByte.closePath();
    }
    if (null != writer) {
        writer.close();
    }
    if (null != fileInputStream) {
        fileInputStream.close();
    }

    if (null != fileOutputStream) {
        fileOutputStream.flush();
        fileOutputStream.close();
    }
}
项目:testarea-itext5    文件:Abstract2DPdfPageSplittingTool.java   
void split(PdfReader reader, int page) throws IOException
{
    PdfImportedPage importedPage = writer.getImportedPage(reader, page);

    Rectangle pageSizeToImport = reader.getPageSize(page);
    Iterable<Rectangle> rectangles = determineSplitRectangles(reader, page);

    for (Rectangle rectangle : rectangles) {
        newPage(rectangle);
        PdfContentByte directContent = writer.getDirectContent();
        directContent.saveState();
        directContent.rectangle(rectangle.getLeft(), rectangle.getBottom(), rectangle.getWidth(), rectangle.getHeight());
        directContent.clip();
        directContent.newPath();

        writer.getDirectContent().addTemplate(importedPage, -pageSizeToImport.getLeft(), -pageSizeToImport.getBottom());

        directContent.restoreState();
    }
}
项目:testarea-itext5    文件:PdfDenseMergeTool.java   
public void merge(OutputStream outputStream, Iterable<PdfReader> inputs) throws DocumentException, IOException
{
    try
    {
        openDocument(outputStream);
        for (PdfReader reader: inputs)
        {
            merge(reader);
        }
    }
    finally
    {
        closeDocument();
    }

}
项目:testarea-itext5    文件:ExtractSuperAndSubInLine.java   
void extract(String resource, int startPage, int endPage, boolean isV2) throws IOException, DocumentException, NoSuchFieldException, SecurityException
{
    String name = new File(resource).getName();
    String target = String.format(isV2 ? "%s-lines-v2-%%s.txt" : "%s-lines-%%s.txt", name);
    InputStream resourceStream = getClass().getResourceAsStream(resource);
    try
    {
        PdfReader reader = new PdfReader(resourceStream);
        System.out.printf("\nText by line in %s\n", name);

        for (int page = startPage; page < endPage; page++)
        {
            System.out.printf("\n   Page %s\n", page);

            String pageText = isV2 ? extractV2(reader, page) : extract(reader, page);
            Files.write(new File(RESULT_FOLDER, String.format(target, page)).toPath(), pageText.getBytes("UTF8"));
            System.out.println(pageText);
        }
    }
    finally
    {
        if (resourceStream != null)
            resourceStream.close();
    }
}
项目:testarea-itext5    文件:TextExtraction.java   
<E extends TextExtractionStrategy> String extractAndStore(PdfReader reader, String format, Class<E> strategyClass, RenderFilter... filters) throws Exception
{
    StringBuilder builder = new StringBuilder();

    for (int page = 1; page <= reader.getNumberOfPages(); page++)
    {
        TextExtractionStrategy strategy = strategyClass.getConstructor().newInstance();
        if (filters != null && filters.length > 0)
        {
            strategy = new FilteredTextRenderListener(strategy, filters);
        }
        String pageText = extract(reader, page, strategy);
        Files.write(Paths.get(String.format(format, page)), pageText.getBytes("UTF8"));

        if (page > 1)
            builder.append("\n\n");
        builder.append(pageText);
    }

    return builder.toString();
}
项目:testarea-itext5    文件:RedactText.java   
/**
 * <a href="http://stackoverflow.com/questions/38278816/remove-header-of-a-pdf-using-itext-pdfcleanupprocessor-does-not-work">
 * Remove header of a pdf using iText PdfCleanUpProcessor does not work
 * </a>
 * <br/>
 * <a href="https://www.dropbox.com/s/4u8vupjqc4st3ib/love.pdf?dl=0">
 * love.pdf
 * </a>
 * <p>
 * Cannot reproduce, I get a <code>org.apache.commons.imaging.ImageReadException: Invalid marker found in entropy data</code>.
 * </p>
 */
@Test
public void testRedactLikeShiranSEkanayake() throws IOException, DocumentException
{
    try (   InputStream resource = getClass().getResourceAsStream("love.pdf");
            OutputStream result = new FileOutputStream(new File(OUTPUTDIR, "love-redacted.pdf")) )
    {
        PdfReader reader = new PdfReader(resource);
        PdfStamper stamper = new PdfStamper(reader, result);
        List<PdfCleanUpLocation> cleanUpLocations = new ArrayList<PdfCleanUpLocation>();

        for(int i=1; i<=reader.getNumberOfPages(); i++)
        {
                //System.out.println(i);
                Rectangle mediabox = reader.getPageSize(i); 
                cleanUpLocations.add(new PdfCleanUpLocation(i, new Rectangle(0,800,1000,1000)));
        }
        PdfCleanUpProcessor cleaner = new PdfCleanUpProcessor(cleanUpLocations, stamper);
        cleaner.cleanUp();
        stamper.close();
        reader.close(); 
    }
}
项目:testarea-itext5    文件:UpdateMetaData.java   
/**
 * <a href="http://stackoverflow.com/questions/43511558/how-to-set-attributes-for-existing-pdf-that-contains-only-images-using-java-itex">
 * how to set attributes for existing pdf that contains only images using java itext?
 * </a>
 * <p>
 * The OP indicated in a comment that he searches a solution without a second file.
 * This test shows how to work with a single file, by first loading the file into a byte array.
 * </p>
 */
@Test
public void testChangeTitleWithoutTempFile() throws IOException, DocumentException
{
    File singleFile = new File(RESULT_FOLDER, "eg_01-singleFile.pdf");
    try (   InputStream resource = getClass().getResourceAsStream("eg_01.pdf")  )
    {
        Files.copy(resource, singleFile.toPath());
    }

    byte[] original = Files.readAllBytes(singleFile.toPath());

    PdfReader reader = new PdfReader(original);
    PdfStamper stamper = new PdfStamper(reader, new FileOutputStream(singleFile));
    Map<String, String> info = reader.getInfo();
    info.put("Title", "New title");
    info.put("CreationDate", new PdfDate().toString());
    stamper.setMoreInfo(info);
    ByteArrayOutputStream baos = new ByteArrayOutputStream();
    XmpWriter xmp = new XmpWriter(baos, info);
    xmp.close();
    stamper.setXmpMetadata(baos.toByteArray());
    stamper.close();
    reader.close();
}
项目:testarea-itext5    文件:StampColoredText.java   
/**
 * The OP's original code transformed into Java
 */
void stampTextOriginal(InputStream source, OutputStream target) throws DocumentException, IOException
{
    Date today = new Date();
    PdfReader reader = new PdfReader(source);
    PdfStamper stamper = new PdfStamper(reader, target);
    BaseFont bf = BaseFont.createFont(BaseFont.HELVETICA_BOLD, BaseFont.WINANSI, BaseFont.EMBEDDED);
    int tSize = 24;
    String mark = "DRAFT " + today;
    int angle = 45;
    float height = reader.getPageSizeWithRotation(1).getHeight()/2;
    float width = reader.getPageSizeWithRotation(1).getWidth()/2;
    PdfContentByte cb = stamper.getOverContent(1);
    cb.setColorFill(new BaseColor(255,200,200));
    cb.setFontAndSize(bf, tSize);
    cb.beginText();
    cb.showTextAligned(Element.ALIGN_CENTER, mark, width, height, angle);
    cb.endText();
    stamper.close();
    reader.close();
}
项目:testarea-itext5    文件:DividerAndColorAwareTextExtraction.java   
String extractAndStore(PdfReader reader, String format, int from, int to, BaseColor headerColor) throws IOException
{
    StringBuilder builder = new StringBuilder();

    for (int page = from; page <= to; page++)
    {
        PdfReaderContentParser parser = new PdfReaderContentParser(reader);
        DividerAwareTextExtrationStrategy strategy = parser.processContent(page, new DividerAndColorAwareTextExtractionStrategy(810, 30, 20, 575, headerColor));

        List<Section> sections = strategy.getSections();
        int i = 0;
        for (Section section : sections)
        {
            String sectionText = strategy.getResultantText(section);
            Files.write(Paths.get(String.format(format, page, i)), sectionText.getBytes("UTF8"));

            builder.append("--\n")
                   .append(sectionText)
                   .append('\n');
            i++;
        }
        builder.append("\n\n");
    }

    return builder.toString();
}
项目:testarea-itext5    文件:CreateSignature.java   
@Test
public void sign50MBrunoPartialAppend() throws IOException, DocumentException, GeneralSecurityException
{
    String filepath = "src/test/resources/mkl/testarea/itext5/signature/50m.pdf";
    String digestAlgorithm = "SHA512";
    CryptoStandard subfilter = CryptoStandard.CMS;

    // Creating the reader and the stamper
    PdfReader reader = new PdfReader(filepath, null, true);
    FileOutputStream os = new FileOutputStream(new File(RESULT_FOLDER, "50m-signedBrunoPartialAppend.pdf"));
    PdfStamper stamper =
        PdfStamper.createSignature(reader, os, '\0', RESULT_FOLDER, true);
    // Creating the appearance
    PdfSignatureAppearance appearance = stamper.getSignatureAppearance();
    appearance.setReason("reason");
    appearance.setLocation("location");
    appearance.setVisibleSignature(new Rectangle(36, 748, 144, 780), 1, "sig");
    // Creating the signature
    ExternalSignature pks = new PrivateKeySignature(pk, digestAlgorithm, "BC");
    ExternalDigest digest = new BouncyCastleDigest();
    MakeSignature.signDetached(appearance, digest, pks, chain,
        null, null, null, 0, subfilter);
}
项目:testarea-itext5    文件:StampUnicodeText.java   
/**
 * <a href="http://stackoverflow.com/questions/35082653/adobe-reader-cant-display-unicode-font-of-pdf-added-with-itext">
 * Adobe Reader can't display unicode font of pdf added with iText
 * </a>
 * <br/>
 * <a href="https://www.dropbox.com/s/erkv9wot9d460dg/sampleOriginal.pdf?dl=0">
 * sampleOriginal.pdf
 * </a>
 * <p>
 * Indeed, just like in the iTextSharp version of the code, the resulting file has
 * issues in Adobe Reader. With a different starting file, though, it doesn't, cf.
 * {@link #testAddUnicodeStampEg_01()}.
 * </p>
 * <p>
 * As it eventually turns out, Adobe Reader treats PDF files with composite fonts
 * differently if they claim to be PDF-1.2 like the OP's sample file.
 * </p>
 */
@Test
public void testAddUnicodeStampSampleOriginal() throws DocumentException, IOException
{
    try (   InputStream resource = getClass().getResourceAsStream("sampleOriginal.pdf");
            OutputStream result = new FileOutputStream(new File(RESULT_FOLDER, "sampleOriginal-unicodeStamp.pdf"))  )
    {
        PdfReader reader = new PdfReader(resource);
        PdfStamper stamper = new PdfStamper(reader, result);
        BaseFont bf = BaseFont.createFont("c:/windows/fonts/arialuni.ttf", BaseFont.IDENTITY_H, BaseFont.EMBEDDED);
        PdfContentByte cb = stamper.getOverContent(1);

        Phrase p = new Phrase();
        p.setFont(new Font(bf, 25, Font.NORMAL, BaseColor.BLUE));
        p.add("Sample Text");

        ColumnText.showTextAligned(cb, PdfContentByte.ALIGN_LEFT, p, 200, 200, 0);

        stamper.close();
    }
}
项目:testarea-itext5    文件:CreateSignature.java   
@Test
public void signCertify2gFix() throws IOException, DocumentException, GeneralSecurityException
{
    String filepath = "src/test/resources/mkl/testarea/itext5/signature/2g-fix.pdf";
    String digestAlgorithm = "SHA512";
    CryptoStandard subfilter = CryptoStandard.CMS;

    // Creating the reader and the stamper
    PdfReader reader = new PdfReader(filepath, null, true);
    FileOutputStream os = new FileOutputStream(new File(RESULT_FOLDER, "2g-fix-certified.pdf"));
    PdfStamper stamper =
        PdfStamper.createSignature(reader, os, '\0', RESULT_FOLDER, true);
    // Creating the appearance
    PdfSignatureAppearance appearance = stamper.getSignatureAppearance();
    appearance.setCertificationLevel(PdfSignatureAppearance.CERTIFIED_NO_CHANGES_ALLOWED);
    appearance.setReason("reason");
    appearance.setLocation("location");
    appearance.setVisibleSignature(new Rectangle(36, 748, 144, 780), 1, "sig");
    // Creating the signature
    ExternalSignature pks = new PrivateKeySignature(pk, digestAlgorithm, "BC");
    ExternalDigest digest = new BouncyCastleDigest();
    MakeSignature.signDetached(appearance, digest, pks, chain,
        null, null, null, 0, subfilter);
}
项目:testarea-itext5    文件:ReadPageLabels.java   
/**
 * <a href="http://stackoverflow.com/questions/32393858/why-result-of-getpagelabels-is-different-from-the-adobe-acrobat">
 * Why result of GetPageLabels is different from the Adobe Acrobat
 * </a>
 * <br/>
 * <a href="https://drive.google.com/file/d/0Bxb0Du7de8igNmVPSUc3VzdPSjg/view?usp=sharing">
 * testHuangMeizai.pdf
 * </a>
 * <p>
 * Indeed, the labels are wrong. There is a small bug in {@link PdfPageLabels#getPageLabelFormats(PdfReader)}.
 * When encountering a new page label dictionary without a P (prefix) entry, it does not reset the current
 * prefix value.
 * </p>
 */
@Test
public void testTestHuangMeizai() throws IOException
{
    try (   InputStream resource = getClass().getResourceAsStream("testHuangMeizai.pdf"))
    {
        final PdfReader reader = new PdfReader(resource);

        String[] objLabels = PdfPageLabels.getPageLabels(reader);
        System.out.println("page number:");
        if (objLabels != null)
        {
            for (int i = 0; i <= objLabels.length - 1; i++)
            {
                System.out.printf("%2d - %s\n", i, objLabels[i]);
            }
        }
    }
}
项目:testarea-itext5    文件:VerifySignature.java   
/**
 * <a href="http://stackoverflow.com/questions/37726215/why-does-my-signature-revision-number-increment-by-2-in-itext-after-detached-s">
 * Why does my signature revision number increment by 2 (in itext) after detached signing?
 * </a>
 * <br/>
 * <a href="https://onedrive.live.com/redir?resid=2F03BFDA84B77A41!113&authkey=!ABPGZ7pxuxoE8A0&ithint=file%2Cpdf">
 * signedoutput.pdf
 * </a>
 * <p>
 * The issue cannot be reproduced. In particular the PDF contains only a single revision.
 * </p>
 */
@Test
public void testVerifySignedOutput() throws IOException, GeneralSecurityException
{
    System.out.println("\n\nsignedoutput.pdf\n================");

    try (   InputStream resource = getClass().getResourceAsStream("signedoutput.pdf") )
    {
        PdfReader reader = new PdfReader(resource);
        AcroFields acroFields = reader.getAcroFields();

        List<String> names = acroFields.getSignatureNames();
        for (String name : names) {
           System.out.println("Signature name: " + name);
           System.out.println("Signature covers whole document: " + acroFields.signatureCoversWholeDocument(name));
           System.out.println("Document revision: " + acroFields.getRevision(name) + " of " + acroFields.getTotalRevisions());
           PdfPKCS7 pk = acroFields.verifySignature(name);
           System.out.println("Subject: " + CertificateInfo.getSubjectFields(pk.getSigningCertificate()));
           System.out.println("Document verifies: " + pk.verify());
        }
    }

    System.out.println();
}
项目:testarea-itext5    文件:CreateSignature.java   
@Test
public void signCertify2g() throws IOException, DocumentException, GeneralSecurityException
{
    String filepath = "src/test/resources/mkl/testarea/itext5/signature/2g.pdf";
    String digestAlgorithm = "SHA512";
    CryptoStandard subfilter = CryptoStandard.CMS;

    // Creating the reader and the stamper
    PdfReader reader = new PdfReader(filepath, null, true);
    FileOutputStream os = new FileOutputStream(new File(RESULT_FOLDER, "2g-certified.pdf"));
    PdfStamper stamper =
        PdfStamper.createSignature(reader, os, '\0', RESULT_FOLDER, true);
    // Creating the appearance
    PdfSignatureAppearance appearance = stamper.getSignatureAppearance();
    appearance.setCertificationLevel(PdfSignatureAppearance.CERTIFIED_NO_CHANGES_ALLOWED);
    appearance.setReason("reason");
    appearance.setLocation("location");
    appearance.setVisibleSignature(new Rectangle(36, 748, 144, 780), 1, "sig");
    // Creating the signature
    ExternalSignature pks = new PrivateKeySignature(pk, digestAlgorithm, "BC");
    ExternalDigest digest = new BouncyCastleDigest();
    MakeSignature.signDetached(appearance, digest, pks, chain,
        null, null, null, 0, subfilter);
}
项目:testarea-itext5    文件:CreateSignature.java   
public void C2_01_SignHelloWorld_sign(String src, String dest, Certificate[] chain, PrivateKey pk, String digestAlgorithm, String provider, CryptoStandard subfilter, String reason, String location)
        throws GeneralSecurityException, IOException, DocumentException {
    // Creating the reader and the stamper
    PdfReader reader = new PdfReader(src);
    FileOutputStream os = new FileOutputStream(dest);
    PdfStamper stamper = PdfStamper.createSignature(reader, os, '\0');
    // Creating the appearance
    PdfSignatureAppearance appearance = stamper.getSignatureAppearance();
    appearance.setReason(reason);
    appearance.setLocation(location);
    appearance.setVisibleSignature(new Rectangle(36, 748, 144, 780), 1, "sig");
    // Creating the signature
    ExternalDigest digest = new BouncyCastleDigest();
    ExternalSignature signature = new PrivateKeySignature(pk, digestAlgorithm, provider);
    MakeSignature.signDetached(appearance, digest, signature, chain, null, null, null, 0, subfilter);
}
项目:testarea-itext5    文件:TextExtraction.java   
/**
 * <a href="https://stackoverflow.com/questions/45662544/itextsharp-library-does-not-extract-text-from-my-file">
 * iTextSharp library does not extract text from my file
 * </a>
 * <br/>
 * <a href="https://www.dropbox.com/s/n4ws2fhr72xaa3s/Text%20Extraction%20-%20Colddishes_C.pdf?dl=0">
 * Text Extraction - Colddishes_C.pdf
 * </a>
 * <p>
 * The PDF declarations of the Asian fonts allow derival of a
 * character code to Unicode map only by means of their ROS
 * values. Thus, itext-asian.jar is required for text extraction. 
 * </p>
 */
@Test
public void testTextExtractionColddishesC() throws IOException, DocumentException
{
    InputStream resourceStream = getClass().getResourceAsStream("Text Extraction - Colddishes_C.pdf");
    try
    {
        PdfReader reader = new PdfReader(resourceStream);
        String content = extractAndStoreSimple(reader, new File(RESULT_FOLDER, "Text Extraction - Colddishes_C.%s.txt").toString());

        System.out.println("\nText Extraction - Colddishes_C.pdf\n************************");
        System.out.println(content);
        System.out.println("************************");
    }
    finally
    {
        if (resourceStream != null)
            resourceStream.close();
    }
}
项目:testarea-itext5    文件:ExtractCertifiedSchoolList.java   
@Test
    public void testCertifiedSchoolList_9_16_2015() throws IOException
    {
        try (   Writer data = new OutputStreamWriter(new FileOutputStream(new File(RESULT_FOLDER, "data.txt")), "UTF-8");
                Writer nonData = new OutputStreamWriter(new FileOutputStream(new File(RESULT_FOLDER, "non-data.txt")), "UTF-8");
                InputStream resource = getClass().getResourceAsStream("certified-school-list-9-16-2015.pdf")    )
        {
            CertifiedSchoolListExtractionStrategy strategy = new CertifiedSchoolListExtractionStrategy(data, nonData);
            PdfReader reader = new PdfReader(resource);

            PdfReaderContentParser parser = new PdfReaderContentParser(reader);
            for (int page = 1; page <= reader.getNumberOfPages(); page++)
                parser.processContent(page, strategy);
//            parser.processContent(28, strategy);
            strategy.close();
        }
    }
项目:testarea-itext5    文件:ShowXfaFields.java   
/**
 * <a href="https://stackoverflow.com/questions/46730760/no-fields-were-printed-on-console-after-verifying-if-form-is-using-acroform-or-x">
 * No fields were printed on console after verifying if form is using Acroform or XFA technology?
 * </a>
 * <br/>
 * <a href="http://blogs.adobe.com/formfeed/files/formfeed/Samples/multiview.pdf">
 * multiview.pdf
 * </a>
 * from
 * <a href="http://blogs.adobe.com/formfeed/2011/02/multiple-top-level-subforms.html">
 * Multiple Top Level Subforms
 * </a>
 * <p>
 * The OP's observation can be reproduced using this sample PDF.
 * </p>
 */
@Test
public void testReadFieldsFromMultiview() throws IOException {
    try (   InputStream resource = getClass().getResourceAsStream("multiview.pdf")  ) {
        PdfReader reader = new PdfReader(resource);
        AcroFields form = reader.getAcroFields();
        XfaForm xfa = form.getXfa();
        System.out.println(xfa.isXfaPresent() ? "XFA form" : "AcroForm");
        Set<String> fields = form.getFields().keySet();
        for (String key : fields) {
            System.out.println(key);
        }
        System.out.flush();
        System.out.close();
        reader.close();
    }
}
项目:testarea-itext5    文件:TextExtraction.java   
/**
 * <a href="http://stackoverflow.com/questions/35344982/itext-extracted-text-from-pdf-file-using-locationtextextractionstrategy-is-in-w">
 * iText: Extracted text from pdf file using LocationTextExtractionStrategy is in wrong order
 * </a>
 * <br/>
 * <a href="https://www.dropbox.com/s/kl2s6038u51gx2q/location_text_extraction_test.pdf?dl=0">
 * location_text_extraction_test.pdf
 * </a>
 * <p>
 * Indeed, the {@link LocationTextExtractionStrategy} returns the headers in the wrong order.
 * This is due to slightly different y coordinates of them.
 * </p>
 * <p>
 * The {@link HorizontalTextExtractionStrategy2} returns the headers and actually the whole table
 * correctly. Unfortunately it fails where there are overlapping lines in side-by-side columns,
 * in this case e.g. for the invoice recipient address.
 * </p>
 */
@Test
public void testLocation_text_extraction_test() throws Exception
{
    InputStream resourceStream = getClass().getResourceAsStream("location_text_extraction_test.pdf");
    try
    {
        PdfReader reader = new PdfReader(resourceStream);
        String content = extractAndStore(reader, new File(RESULT_FOLDER, "location_text_extraction_test.%s.txt").toString());
        String horizontalContent = extractAndStore(reader, new File(RESULT_FOLDER, "location_text_extraction_test.%s.txt").toString(), HorizontalTextExtractionStrategy2.class);

        System.out.println("\nText (location strategy) location_text_extraction_test.pdf \n************************");
        System.out.println(content);
        System.out.println("\nText (horizontal strategy) location_text_extraction_test.pdf \n************************");
        System.out.println(horizontalContent);
        System.out.println("************************");
    }
    finally
    {
        if (resourceStream != null)
            resourceStream.close();
    }
}
项目:testarea-itext5    文件:SimpleRedactionTest.java   
static byte[] createRotatedIndirectTextPdf() throws DocumentException, IOException
{
    ByteArrayOutputStream baos = new ByteArrayOutputStream();

    Document document = new Document();
    PdfWriter writer = PdfWriter.getInstance(document, baos);
    document.open();
    PdfReader reader = new PdfReader(createSimpleTextPdf());
    PdfImportedPage template = writer.getImportedPage(reader, 1);
    Rectangle pageSize = reader.getPageSize(1);
    writer.getDirectContent().addTemplate(template, .7f, .7f, -.7f, .7f, 400, -200);
    document.newPage();
    writer.getDirectContent().addTemplate(template, pageSize.getLeft(), pageSize.getBottom());
    document.close();

    return baos.toByteArray();
}
项目:testarea-itext5    文件:CreateEllipse.java   
/**
 * <a href="http://stackoverflow.com/questions/43205385/trying-to-draw-an-ellipse-annotation-and-the-border-on-the-edges-goes-thin-and-t">
 * Trying to draw an ellipse annotation and the border on the edges goes thin and thik when i try to roatate pdf itext5
 * </a>
 * <p>
 * This test creates an ellipse annotation without appearance on a page with rotation.
 * The ellipse form looks ok but it is moved to the right of the actual appearance rectangle when viewed in Adobe Reader.
 * This is caused by iText creating a non-standard rectangle, the lower left not being the lower left etc.
 * </p>
 * @see #testCreateEllipse()
 * @see #testCreateEllipseAppearance()
 * @see #testCreateEllipseAppearanceOnRotated()
 * @see #testCreateCorrectEllipseAppearanceOnRotated()
 */
@Test
public void testCreateEllipseOnRotated() throws IOException, DocumentException
{
    try (   InputStream resourceStream = getClass().getResourceAsStream("/mkl/testarea/itext5/merge/testA4.pdf");
            OutputStream outputStream = new FileOutputStream(new File(RESULT_FOLDER, "testA4-rotated-ellipse.pdf"))    )
    {
        PdfReader reader = new PdfReader(resourceStream);
        reader.getPageN(1).put(PdfName.ROTATE, new PdfNumber(90));

        PdfStamper stamper = new PdfStamper(reader, outputStream);

        Rectangle rect = new Rectangle(202 + 6f, 300, 200 + 100, 300 + 150);

        PdfAnnotation annotation = PdfAnnotation.createSquareCircle(stamper.getWriter(), rect, null, false);
        annotation.setFlags(PdfAnnotation.FLAGS_PRINT);
        annotation.setColor(BaseColor.RED);
        annotation.setBorderStyle(new PdfBorderDictionary(3.5f, PdfBorderDictionary.STYLE_SOLID));

        stamper.addAnnotation(annotation, 1);

        stamper.close();
        reader.close();
    }
}
项目:testarea-itext5    文件:TextExtraction.java   
/**
 * <a href="http://itext.2136553.n4.nabble.com/iText-help-resources-tt4660980.html">
 * [iText-questions] iText help resources?
 * </a>
 * <br/>
 * <a href="http://itext.2136553.n4.nabble.com/attachment/4660980/0/testin.pdf">
 * testin.pdf
 * </a>
 * <p>
 * Indeed, the tables cannot be extracted. Further analysis shows that the text
 * in the tables uses type 3 fonts with an ad-hoc encoding missing any mapping
 * to Unicode.
 * </p>
 */
@Test
public void testTestin() throws IOException, DocumentException
{
    InputStream resourceStream = getClass().getResourceAsStream("testin.pdf");
    try
    {
        PdfReader reader = new PdfReader(resourceStream);
        String content = extractAndStoreSimple(reader, new File(RESULT_FOLDER, "testin.%s.txt").toString());

        System.out.println("\nText testin.pdf\n************************");
        System.out.println(content);
        System.out.println("************************");
    }
    finally
    {
        if (resourceStream != null)
            resourceStream.close();
    }
}
项目:testarea-itext5    文件:SplitPages.java   
/**
 * <a href="https://stackoverflow.com/questions/46466747/how-to-split-a-pdf-page-in-java">
 * How to split a PDF page in java?
 * </a>
 * <p>
 * This test shows how to split the pages of a document into tiles of A6
 * size using the {@link Abstract2DPdfPageSplittingTool}.
 * </p>
 */
@Test
public void testSplitDocumentA6() throws IOException, DocumentException {
    try (InputStream resource = getClass().getResourceAsStream("document.pdf");
            OutputStream result = new FileOutputStream(new File(RESULT_FOLDER, "document-A6.pdf"))) {
        Abstract2DPdfPageSplittingTool tool = new Abstract2DPdfPageSplittingTool() {
            @Override
            protected Iterable<Rectangle> determineSplitRectangles(PdfReader reader, int page) {
                Rectangle targetSize = PageSize.A6;
                List<Rectangle> rectangles = new ArrayList<>();
                Rectangle pageSize = reader.getPageSize(page);
                for (float y = pageSize.getTop(); y > pageSize.getBottom() + 5; y-=targetSize.getHeight()) {
                    for (float x = pageSize.getLeft(); x < pageSize.getRight() - 5; x+=targetSize.getWidth()) {
                        rectangles.add(new Rectangle(x, y - targetSize.getHeight(), x + targetSize.getWidth(), y));
                    }
                }
                return rectangles;
            }
        };
        tool.split(result, new PdfReader(resource));
    }
}
项目:testarea-itext5    文件:HideContent.java   
/**
 * <a href="http://stackoverflow.com/questions/43870545/filling-a-pdf-with-itextsharp-and-then-hiding-the-base-layer">
 * Filling a PDF with iTextsharp and then hiding the base layer
 * </a>
 * <p>
 * This test shows how to cover all content using a white rectangle.
 * </p>
 */
@Test
public void testHideContenUnderRectangle() throws IOException, DocumentException
{
    try (   InputStream resource = getClass().getResourceAsStream("document.pdf");
            OutputStream result = new FileOutputStream(new File(RESULT_FOLDER, "document-hiddenContent.pdf")))
    {
        PdfReader pdfReader = new PdfReader(resource);
        PdfStamper pdfStamper = new PdfStamper(pdfReader, result);
        for (int page = 1; page <= pdfReader.getNumberOfPages(); page++)
        {
            Rectangle pageSize = pdfReader.getPageSize(page);
            PdfContentByte canvas = pdfStamper.getOverContent(page);
            canvas.setColorFill(BaseColor.WHITE);
            canvas.rectangle(pageSize.getLeft(), pageSize.getBottom(), pageSize.getWidth(), pageSize.getHeight());
            canvas.fill();
        }
        pdfStamper.close();
    }
}
项目:testarea-itext5    文件:RemappedExtraction.java   
String extractAndStoreRemapped(PdfReader reader, String format) throws IOException, NoSuchFieldException, SecurityException
{
    StringBuilder builder = new StringBuilder();

    for (int page = 1; page <= reader.getNumberOfPages(); page++)
    {
        String pageText = extractRemapped(reader, page);
        Files.write(Paths.get(String.format(format, page)), pageText.getBytes("UTF8"));

        if (page > 1)
            builder.append("\n\n");
        builder.append(pageText);
    }

    return builder.toString();
}
项目:testarea-itext5    文件:PortfolioFileExtraction.java   
static Map<Integer, File> retrieveFolders(PdfReader reader, File baseDir) throws DocumentException
{
    Map<Integer, File> result = new HashMap<Integer, File>();

    PdfDictionary root = reader.getCatalog();
    PdfDictionary collection = root.getAsDict(PdfName.COLLECTION);
    if (collection == null)
        throw new DocumentException("Document has no Collection dictionary");
    PdfDictionary folders = collection.getAsDict(FOLDERS);
    if (folders == null)
        throw new DocumentException("Document collection has no folders dictionary");

    collectFolders(result, folders, baseDir);

    return result;
}
项目:testarea-itext5    文件:EnlargePagePart.java   
/**
 * <a href="http://stackoverflow.com/questions/35374110/how-do-i-use-itext-to-have-a-landscaped-pdf-on-half-of-a-a4-back-to-portrait-and">
 * How do i use iText to have a landscaped PDF on half of a A4 back to portrait and full size on A4
 * </a>
 * <p>
 * This sample shows how to rotate and enlarge the upper half of an A4 page to fit into a new A4 page.
 * </p>
 */
@Test
public void testRotateAndZoomUpperHalfPage() throws IOException, DocumentException
{
    try (   InputStream resource = getClass().getResourceAsStream("/mkl/testarea/itext5/extract/test.pdf");
            OutputStream result = new FileOutputStream(new File(RESULT_FOLDER, "test-upperHalf.pdf"))   )
    {
        PdfReader reader = new PdfReader(resource);
        Document document = new Document(PageSize.A4);
        PdfWriter writer = PdfWriter.getInstance(document, result);
        document.open();

        double sqrt2 = Math.sqrt(2);
        Rectangle pageSize = reader.getPageSize(1);
        PdfImportedPage importedPage = writer.getImportedPage(reader, 1);
        writer.getDirectContent().addTemplate(importedPage, 0, sqrt2, -sqrt2, 0, pageSize.getTop() * sqrt2, -pageSize.getLeft() * sqrt2);

        document.close();
    }
}
项目:testarea-itext5    文件:VerifySignature.java   
/**
 * <a href="https://stackoverflow.com/questions/45027712/invalid-signature-when-signing-an-existing-sigrature-field-with-cosign-sapi">
 * Invalid signature when signing an existing sigrature field with CoSign SAPI
 * </a>
 * <br/>
 * <a href="https://www.dropbox.com/s/j6eme53lleaok13/test_signed.pdf?dl=0">
 * test_signed-1.pdf
 * </a>
 * <p>
 * Validation shows verification success while both Adobe and SD DSS fail.
 * Embedded certificates have issues (emailAddress RDN is typed PrintableString
 * which is wrong - specified is IA5String - and does not even make sense as
 * there is no '@' in PrintableString), but does this explain it?
 * </p>
 */
@Test
public void testVerifyTestSigned1() throws IOException, GeneralSecurityException
{
    System.out.println("\n\ntest_signed-1.pdf\n===================");

    try (   InputStream resource = getClass().getResourceAsStream("test_signed-1.pdf") )
    {
        PdfReader reader = new PdfReader(resource);
        AcroFields acroFields = reader.getAcroFields();

        List<String> names = acroFields.getSignatureNames();
        for (String name : names) {
           System.out.println("Signature name: " + name);
           System.out.println("Signature covers whole document: " + acroFields.signatureCoversWholeDocument(name));
           System.out.println("Document revision: " + acroFields.getRevision(name) + " of " + acroFields.getTotalRevisions());
           PdfPKCS7 pk = acroFields.verifySignature(name);
           System.out.println("Subject: " + CertificateInfo.getSubjectFields(pk.getSigningCertificate()));
           System.out.println("Document verifies: " + pk.verify());
        }
    }

    System.out.println();
}
项目:testarea-itext5    文件:EditPageContent.java   
@Test
public void testRemoveTransparentGraphicsTransparency() throws IOException, DocumentException
{
    try (   InputStream resource = getClass().getResourceAsStream("transparency.pdf");
            OutputStream result = new FileOutputStream(new File(RESULT_FOLDER, "transparency-noTransparency.pdf")))
    {
        PdfReader pdfReader = new PdfReader(resource);
        PdfStamper pdfStamper = new PdfStamper(pdfReader, result);
        PdfContentStreamEditor editor = new TransparentGraphicsRemover();

        for (int i = 1; i <= pdfReader.getNumberOfPages(); i++)
        {
            editor.editPage(pdfStamper, i);
        }

        pdfStamper.close();
    }
}
项目:testarea-itext5    文件:PortfolioFileExtraction.java   
/**
 * These two methods ({@link #extractAttachments(PdfReader, String)} and
 * {@link #extractAttachment(PdfReader, File, PdfString, PdfDictionary)})
 * essentially are the OP's original code posted in his question. They
 * extract files without the folder structure.
 */
public static void extractAttachments(PdfReader reader, String dir) throws IOException
{
    File folder = new File(dir);
    folder.mkdirs();

    PdfDictionary root = reader.getCatalog();

    PdfDictionary names = root.getAsDict(PdfName.NAMES);
    System.out.println("" + names.getKeys().toString());
    PdfDictionary embedded = names.getAsDict(PdfName.EMBEDDEDFILES);
    System.out.println("" + embedded.toString());

    PdfArray filespecs = embedded.getAsArray(PdfName.NAMES);

    //System.out.println(filespecs.getAsString(root1));
    for (int i = 0; i < filespecs.size();)
    {
        extractAttachment(reader, folder, filespecs.getAsString(i++), filespecs.getAsDict(i++));
    }
}
项目:testarea-itext5    文件:VerifySignature.java   
/**
 * <a href="https://stackoverflow.com/questions/46346144/digital-signature-verification-with-itext-not-working">
 * Digital Signature Verification with itext not working
 * </a>
 * <br/>
 * <a href="https://drive.google.com/open?id=0B1XKjvoeoyPZWnk5bzc5T3VSQUk">
 * test_dsp.pdf
 * </a>
 * <p>
 * The issue is that the signature uses ECDSA and iText 5 does not (yet)
 * support ECDSA. "Support" here actually means that iText cannot find
 * the name ECDSA for the OID 1.2.840.10045.4.3.2 (SHA256withECDSA) to
 * build a proper algorithm name to use for verification.
 * </p>
 * <p>
 * Adding a mapping "1.2.840.10045.4.3.2" to "ECDSA" resolves the issue.
 * </p>
 * @see #testVerify20180115an_signed_original()
 */
@Test
public void testVerifyTestDsp() throws IOException, GeneralSecurityException, NoSuchFieldException, SecurityException, IllegalArgumentException, IllegalAccessException
{
    Field algorithmNamesField = EncryptionAlgorithms.class.getDeclaredField("algorithmNames");
    algorithmNamesField.setAccessible(true);
    @SuppressWarnings("unchecked")
    HashMap<String, String> algorithmNames = (HashMap<String, String>) algorithmNamesField.get(null);
    algorithmNames.put("1.2.840.10045.4.3.2", "ECDSA");

    System.out.println("\n\ntest_dsp.pdf\n===================");

    try (   InputStream resource = getClass().getResourceAsStream("test_dsp.pdf") )
    {
        PdfReader reader = new PdfReader(resource);
        AcroFields acroFields = reader.getAcroFields();

        List<String> names = acroFields.getSignatureNames();
        for (String name : names) {
           System.out.println("Signature name: " + name);
           System.out.println("Signature covers whole document: " + acroFields.signatureCoversWholeDocument(name));
           System.out.println("Document revision: " + acroFields.getRevision(name) + " of " + acroFields.getTotalRevisions());
           PdfPKCS7 pk = acroFields.verifySignature(name);
           System.out.println("Subject: " + CertificateInfo.getSubjectFields(pk.getSigningCertificate()));
           System.out.println("Document verifies: " + pk.verify());
        }
    }

    System.out.println();
}
项目:testarea-itext5    文件:TextExtraction.java   
/**
 * <a href="http://stackoverflow.com/questions/39932311/itext-java-not-parsing-text-properly-from-pdf">
 * iText java not parsing text properly from PDF
 * </a>
 * <br/>
 * <a href="https://www.dropbox.com/s/vc9it3c7856ejli/testPDF.pdf?dl=0">
 * testPDF.pdf
 * </a>
 * <p>
 * Indeed, the 1.2 is located minutely below the SUBMITTALS. The
 * {@link HorizontalTextExtractionStrategy2} can be used to fix this.
 * </p>
 */
@Test
public void testTestPDF() throws Exception
{
    InputStream resourceStream = getClass().getResourceAsStream("testPDF.pdf");
    try
    {
        PdfReader reader = new PdfReader(resourceStream);
        String content = extractAndStore(reader, new File(RESULT_FOLDER, "testPDF.%s.txt").toString());
        String horizontalContent = extractAndStore(reader, new File(RESULT_FOLDER, "testPDF.HOR.%s.txt").toString(), HorizontalTextExtractionStrategy2.class);

        System.out.println("\nText (location strategy) testPDF.pdf \n************************");
        System.out.println(content);
        System.out.println("\nText (horizontal strategy) testPDF.pdf \n************************");
        System.out.println(horizontalContent);
        System.out.println("************************");
    }
    finally
    {
        if (resourceStream != null)
            resourceStream.close();
    }
}
项目:testarea-itext5    文件:VeryDenseMerging.java   
/**
 * <a href="http://stackoverflow.com/questions/28991291/how-to-remove-whitespace-on-merge">
 * How To Remove Whitespace on Merge
 * </a>
 * <p>
 * Testing {@link PdfVeryDenseMergeTool} using the OP's files.
 * </p>
 */
@Test
public void testMergeGrandizerFiles() throws DocumentException, IOException
{
    try (   InputStream docA = getClass().getResourceAsStream("Header.pdf");
            InputStream docB = getClass().getResourceAsStream("Body.pdf");
            InputStream docC = getClass().getResourceAsStream("Footer.pdf");    )
    {
        PdfVeryDenseMergeTool tool = new PdfVeryDenseMergeTool(PageSize.A4, 18, 18, 5);
        PdfReader readerA = new PdfReader(docA);
        PdfReader readerB = new PdfReader(docB);
        PdfReader readerC = new PdfReader(docC);
        try (FileOutputStream fos = new FileOutputStream(new File(RESULT_FOLDER, "GrandizerMerge-veryDense.pdf")))
        {
            List<PdfReader> inputs = Arrays.asList(readerA, readerB, readerC);
            tool.merge(fos, inputs);
        }
        finally
        {
            readerA.close();
            readerB.close();
            readerC.close();
        }
    }
}
项目:testarea-itext5    文件:VeryDenseMerging.java   
/**
 * <a href="http://stackoverflow.com/questions/28991291/how-to-remove-whitespace-on-merge">
 * How To Remove Whitespace on Merge
 * </a>
 * <p>
 * Testing {@link PdfVeryDenseMergeTool} using the OP's files and a gap of 10. This was the
 * OP's gap value of choice resulting in lost lines. Cannot reproduce...
 * </p>
 */
@Test
public void testMergeGrandizerFilesGap10() throws DocumentException, IOException
{
    try (   InputStream docA = getClass().getResourceAsStream("Header.pdf");
            InputStream docB = getClass().getResourceAsStream("Body.pdf");
            InputStream docC = getClass().getResourceAsStream("Footer.pdf");    )
    {
        PdfVeryDenseMergeTool tool = new PdfVeryDenseMergeTool(PageSize.A4, 18, 18, 10);
        PdfReader readerA = new PdfReader(docA);
        PdfReader readerB = new PdfReader(docB);
        PdfReader readerC = new PdfReader(docC);
        try (FileOutputStream fos = new FileOutputStream(new File(RESULT_FOLDER, "GrandizerMerge-veryDense-gap10.pdf")))
        {
            List<PdfReader> inputs = Arrays.asList(readerA, readerB, readerC);
            tool.merge(fos, inputs);
        }
        finally
        {
            readerA.close();
            readerB.close();
            readerC.close();
        }
    }
}