Java 类com.itextpdf.text.pdf.parser.Vector 实例源码

项目:finances    文件:PdfFieldValueExtractor.java   
public TextWalker(Iterable<Entry<Vector, String>> pdfText) throws IOException {
    for (Entry<Vector, String> entry : pdfText) {
        x = entry.getKey().get(Vector.I1);
        if (entry.getKey().get(Vector.I2) != y || isPastRightEdge(x)) {
            prefix.setLength(0);
            y = entry.getKey().get(Vector.I2);
        }
        List<ImportField> fieldCandidates = getMatches();
        if (fieldCandidates.size() == 1) {
            ImportField field = fieldCandidates.get(0);
            if (field.hasLabel(prefix.toString().trim()) && field.isValueRegion(x)) {
                appendValue(field, entry.getValue());
            }
            else if (field.isLabelRegion(x)) {
                appendPrefix(entry.getValue());
            }
        }
        else if (! fieldCandidates.isEmpty()) {
            appendPrefix(entry.getValue());
        }
    }
}
项目:testarea-itext5    文件:MarginFinder.java   
/**
 * @see RenderListener#renderImage(ImageRenderInfo)
 */
public void renderImage(ImageRenderInfo renderInfo)
{
    Matrix imageCtm = renderInfo.getImageCTM();
    Vector a = new Vector(0, 0, 1).cross(imageCtm);
    Vector b = new Vector(1, 0, 1).cross(imageCtm);
    Vector c = new Vector(0, 1, 1).cross(imageCtm);
    Vector d = new Vector(1, 1, 1).cross(imageCtm);
    LineSegment bottom = new LineSegment(a, b);
    LineSegment top = new LineSegment(c, d);
    if (textRectangle == null)
        textRectangle = bottom.getBoundingRectange();
    else
        textRectangle.add(bottom.getBoundingRectange());

    textRectangle.add(top.getBoundingRectange());
}
项目:testarea-itext5    文件:DividerAwareTextExtrationStrategy.java   
/**
 * This method adds the current path to {@link #lines} if it consists
 * of a single line, the operation is no no-op, and the line is
 * approximately horizontal.
 *  
 * @see ExtRenderListener#renderPath(PathPaintingRenderInfo)
 */
@Override
public Path renderPath(PathPaintingRenderInfo renderInfo)
{
    if (moveToVector != null && lineToVector != null &&
        renderInfo.getOperation() != PathPaintingRenderInfo.NO_OP)
    {
        Vector from = moveToVector.cross(renderInfo.getCtm());
        Vector to = lineToVector.cross(renderInfo.getCtm());
        Vector extent = to.subtract(from);

        if (Math.abs(20 * extent.get(Vector.I2)) < Math.abs(extent.get(Vector.I1)))
        {
            LineSegment line;
            if (extent.get(Vector.I1) >= 0)
                line = new LineSegment(from, to);
            else
                line = new LineSegment(to, from);
            lines.add(line);
        }
    }

    moveToVector = null;
    lineToVector = null;
    return null;
}
项目:testarea-itext5    文件:HorizontalTextExtractionStrategy2.java   
public int getLineNumber()
{
    Vector startLocation = getStartLocation();
    float y = startLocation.get(Vector.I2);
    List<Float> flips = textLineFinder.verticalFlips;
    if (flips == null || flips.isEmpty())
        return 0;
    if (y < flips.get(0))
        return flips.size() / 2 + 1;
    for (int i = 1; i < flips.size(); i+=2)
    {
        if (y < flips.get(i))
        {
            return (1 + flips.size() - i) / 2;
        }
    }
    return 0;
}
项目:testarea-itext5    文件:HorizontalTextExtractionStrategy.java   
public int getLineNumber()
{
    Vector startLocation = getStartLocation();
    float y = startLocation.get(Vector.I2);
    List<Float> flips = textLineFinder.verticalFlips;
    if (flips == null || flips.isEmpty())
        return 0;
    if (y < flips.get(0))
        return flips.size() / 2 + 1;
    for (int i = 1; i < flips.size(); i+=2)
    {
        if (y < flips.get(i))
        {
            return (1 + flips.size() - i) / 2;
        }
    }
    return 0;
}
项目:testarea-itext5    文件:FreeSpaceFinder.java   
@Override
public void renderImage(ImageRenderInfo renderInfo)
{
    Matrix imageMatrix = renderInfo.getImageCTM();

    Vector image00 = rect00.cross(imageMatrix);
    Vector image01 = rect01.cross(imageMatrix);
    Vector image10 = rect10.cross(imageMatrix);
    Vector image11 = rect11.cross(imageMatrix);

    Rectangle2D usedSpace = new Rectangle2D.Float(image00.get(Vector.I1), image00.get(Vector.I2), 0, 0);
    usedSpace.add(image01.get(Vector.I1), image01.get(Vector.I2));
    usedSpace.add(image10.get(Vector.I1), image10.get(Vector.I2));
    usedSpace.add(image11.get(Vector.I1), image11.get(Vector.I2));

    remove(usedSpace);
}
项目:testarea-itext5    文件:DividerAndColorAwareTextExtractionStrategy.java   
/**
 * <p>
 * As the {@link DividerAwareTextExtrationStrategy#lines} are not
 * properly sorted anymore (the additional lines come after all
 * divider lines of the same column), we have to sort that {@link List}
 * first.
 * </p>
 * <p>
 * Please be aware that the {@link Comparator} used here is not really
 * proper: It ignores a certain difference in the x coordinate which
 * makes it not really transitive. It only works if the individual lines
 * of the same column have approximately the same starting x coordinate
 * differing clearly from those of different columns.
 * </p>
 */
@Override
public List<Section> getSections()
{
    Collections.sort(lines, new Comparator<LineSegment>()
    {
        @Override
        public int compare(LineSegment o1, LineSegment o2)
        {
            Vector start1 = o1.getStartPoint();
            Vector start2 = o2.getStartPoint();

            float v1 = start1.get(Vector.I1), v2 = start2.get(Vector.I1);
            if (Math.abs(v1 - v2) < 2)
            {
                v1 = start2.get(Vector.I2);
                v2 = start1.get(Vector.I2);
            }

            return Float.compare(v1, v2);
        }
    });

    return super.getSections();
}
项目:testarea-itext5    文件:ExtractDrawnCheckboxes.java   
/**
 * <a href="http://stackoverflow.com/questions/40549977/reading-legacy-word-forms-checkboxes-converted-to-pdf">
 * Reading legacy Word forms checkboxes converted to PDF
 * </a>
 * <br>
 * <a href="https://www.dropbox.com/s/4z7ky3yy2yaj53i/Doc1.pdf?dl=0">
 * Doc1.pdf
 * </a>
 * <p>
 * This test shows how one can extract the sample drawn "checkboxes" from the
 * sample PDF provided by the OP.
 * </p>
 */
@Test
public void testExtractDoc1() throws IOException
{
    try (   InputStream resource = getClass().getResourceAsStream("Doc1.pdf"))
    {
        PdfReader pdfReader = new PdfReader(resource);

        for (int page = 1; page <= pdfReader.getNumberOfPages(); page++)
        {
            System.out.printf("\nPage %s\n====\n", page);

            CheckBoxExtractionStrategy strategy = new CheckBoxExtractionStrategy();
            PdfReaderContentParser parser = new PdfReaderContentParser(pdfReader);
            parser.processContent(page, strategy);

            for (Box box : strategy.getBoxes())
            {
                Vector basePoint = box.getDiagonal().getStartPoint();
                System.out.printf("at %s, %s - %s\n", basePoint.get(Vector.I1), basePoint.get(Vector.I2),
                        box.isChecked() ? "checked" : "unchecked");
            }
        }
    }
}
项目:finances    文件:TextExtractor.java   
public static void main(String[] args) {
    try {
        FileInputStream stream = new FileInputStream(args[0]);
        Stream<Entry<Vector, String>> text = new TextExtractor(stream).getText();
        text.forEach(entry -> System.out.println(entry.getKey().toString() + ":" + entry.getValue()));
    } catch (Exception ex) {
        ex.printStackTrace();
    }
}
项目:finances    文件:PdfFieldValueExtractorTest.java   
private float addPdfText(String text, float x, float y) {
    for (String word : text.split(" ")) {
        pdfTextBuilder.put(new Vector(x, y, 0f), word);
        x += 1f;
    }
    return x;
}
项目:testarea-itext5    文件:MarginFinder.java   
@Override
public void modifyPath(PathConstructionRenderInfo renderInfo)
{
    List<Vector> points = new ArrayList<Vector>();
    if (renderInfo.getOperation() == PathConstructionRenderInfo.RECT)
    {
        float x = renderInfo.getSegmentData().get(0);
        float y = renderInfo.getSegmentData().get(1);
        float w = renderInfo.getSegmentData().get(2);
        float h = renderInfo.getSegmentData().get(3);
        points.add(new Vector(x, y, 1));
        points.add(new Vector(x+w, y, 1));
        points.add(new Vector(x, y+h, 1));
        points.add(new Vector(x+w, y+h, 1));
    }
    else if (renderInfo.getSegmentData() != null)
    {
        for (int i = 0; i < renderInfo.getSegmentData().size()-1; i+=2)
        {
            points.add(new Vector(renderInfo.getSegmentData().get(i), renderInfo.getSegmentData().get(i+1), 1));
        }
    }

    for (Vector point: points)
    {
        point = point.cross(renderInfo.getCtm());
        Rectangle2D.Float pointRectangle = new Rectangle2D.Float(point.get(Vector.I1), point.get(Vector.I2), 0, 0);
        if (currentPathRectangle == null)
            currentPathRectangle = pointRectangle;
        else
            currentPathRectangle.add(pointRectangle);
    }
}
项目:testarea-itext5    文件:PageVerticalAnalyzer.java   
@Override
public void renderText(TextRenderInfo renderInfo)
{
    LineSegment ascentLine = renderInfo.getAscentLine();
    LineSegment descentLine = renderInfo.getDescentLine();
    float[] yCoords = new float[]{
            ascentLine.getStartPoint().get(Vector.I2),
            ascentLine.getEndPoint().get(Vector.I2),
            descentLine.getStartPoint().get(Vector.I2),
            descentLine.getEndPoint().get(Vector.I2)
    };
    Arrays.sort(yCoords);
    addVerticalUseSection(yCoords[0], yCoords[3]);
}
项目:testarea-itext5    文件:PageVerticalAnalyzer.java   
@Override
public void renderImage(ImageRenderInfo renderInfo)
{
    Matrix ctm = renderInfo.getImageCTM();
    float[] yCoords = new float[4];
    for (int x=0; x < 2; x++)
        for (int y=0; y < 2; y++)
        {
            Vector corner = new Vector(x, y, 1).cross(ctm);
            yCoords[2*x+y] = corner.get(Vector.I2);
        }
    Arrays.sort(yCoords);
    addVerticalUseSection(yCoords[0], yCoords[3]);
}
项目:testarea-itext5    文件:FreeSpaceFinderExt.java   
@Override
public void modifyPath(PathConstructionRenderInfo renderInfo)
{
    List<Vector> points = new ArrayList<Vector>();
    if (renderInfo.getOperation() == PathConstructionRenderInfo.RECT)
    {
        float x = renderInfo.getSegmentData().get(0);
        float y = renderInfo.getSegmentData().get(1);
        float w = renderInfo.getSegmentData().get(2);
        float h = renderInfo.getSegmentData().get(3);
        points.add(new Vector(x, y, 1));
        points.add(new Vector(x+w, y, 1));
        points.add(new Vector(x, y+h, 1));
        points.add(new Vector(x+w, y+h, 1));
    }
    else if (renderInfo.getSegmentData() != null)
    {
        for (int i = 0; i < renderInfo.getSegmentData().size()-1; i+=2)
        {
            points.add(new Vector(renderInfo.getSegmentData().get(i), renderInfo.getSegmentData().get(i+1), 1));
        }
    }

    for (Vector point: points)
    {
        point = point.cross(renderInfo.getCtm());
        Rectangle2D.Float pointRectangle = new Rectangle2D.Float(point.get(Vector.I1), point.get(Vector.I2), 0, 0);
        if (currentPathRectangle == null)
            currentPathRectangle = pointRectangle;
        else
            currentPathRectangle.add(pointRectangle);
    }
}
项目:testarea-itext5    文件:DividerAwareTextExtrationStrategy.java   
/**
 * <p>
 * This method returns a {@link List} of {@link Section} instances each representing
 * a section of the page delimited by a divider line above and/or below. The topmost
 * and bottommost sections of each text column are open at the top or the bottom,
 * implicitly delimited by the matching margin line.
 * </p>
 * <p>
 * {@link Section} implements {@link TextChunkFilter}. Thus, these section objects can be
 * used as argument of the parent class method {@link #getResultantText(TextChunkFilter)}.
 * </p>
 */
public List<Section> getSections()
{
    List<Section> result = new ArrayList<Section>();
    // TODO: Sort the array columnwise. In case of the OP's document, the lines already appear in the
    // correct order, so there was no need for sorting in the POC. 

    LineSegment previous = null;
    for (LineSegment line : lines)
    {
        if (previous == null)
        {
            result.add(new Section(null, line));
        }
        else if (Math.abs(previous.getStartPoint().get(Vector.I1) - line.getStartPoint().get(Vector.I1)) < 2) // 2 is a magic number... 
        {
            result.add(new Section(previous, line));
        }
        else
        {
            result.add(new Section(previous, null));
            result.add(new Section(null, line));
        }
        previous = line;
    }

    return result;
}
项目:testarea-itext5    文件:DividerAwareTextExtrationStrategy.java   
Section(LineSegment topLine, LineSegment bottomLine)
{
    float left, right, top, bottom;
    if (topLine != null)
    {
        this.topLine = topLine;
        top = Math.max(topLine.getStartPoint().get(Vector.I2), topLine.getEndPoint().get(Vector.I2));
        right = Math.max(topLine.getStartPoint().get(Vector.I1), topLine.getEndPoint().get(Vector.I1));
        left = Math.min(topLine.getStartPoint().get(Vector.I1), topLine.getEndPoint().get(Vector.I1));
    }
    else
    {
        top = topMargin;
        left = leftMargin;
        right = rightMargin;
    }

    if (bottomLine != null)
    {
        this.bottomLine = bottomLine;
        bottom = Math.min(bottomLine.getStartPoint().get(Vector.I2), bottomLine.getEndPoint().get(Vector.I2));
        right = Math.max(bottomLine.getStartPoint().get(Vector.I1), bottomLine.getEndPoint().get(Vector.I1));
        left = Math.min(bottomLine.getStartPoint().get(Vector.I1), bottomLine.getEndPoint().get(Vector.I1));
    }
    else
    {
        bottom = bottomMargin;
    }

    this.top = top;
    this.bottom = bottom;
    this.left = left;
    this.right = right;
}
项目:testarea-itext5    文件:DividerAwareTextExtrationStrategy.java   
@Override
public boolean accept(TextChunk textChunk)
{
    // TODO: This code only checks the text chunk starting point. One should take the 
    // whole chunk into consideration
    Vector startlocation = textChunk.getStartLocation();
    float x = startlocation.get(Vector.I1);
    float y = startlocation.get(Vector.I2);

    return (left <= x) && (x <= right) && (bottom <= y) && (y <= top);
}
项目:testarea-itext5    文件:TextLineFinder.java   
@Override
public void renderText(TextRenderInfo renderInfo)
{
    LineSegment ascentLine = renderInfo.getAscentLine();
    LineSegment descentLine = renderInfo.getDescentLine();
    float[] yCoords = new float[]{
            ascentLine.getStartPoint().get(Vector.I2),
            ascentLine.getEndPoint().get(Vector.I2),
            descentLine.getStartPoint().get(Vector.I2),
            descentLine.getEndPoint().get(Vector.I2)
    };
    Arrays.sort(yCoords);
    addVerticalUseSection(yCoords[0], yCoords[3]);
}
项目:testarea-itext5    文件:CertifiedSchoolListExtractionStrategy.java   
@Override
public void renderText(TextRenderInfo renderInfo)
{
    try
    {
        Vector startPoint = renderInfo.getBaseline().getStartPoint();
        BaseColor fillColor = renderInfo.getFillColor();
        if (fillColor instanceof GrayColor && ((GrayColor)fillColor).getGray() == 0)
        {
            if (debug)
                data.append(String.format("%4d\t%3.3f %3.3f\t%s\n", chunk, startPoint.get(I1), startPoint.get(I2), renderInfo.getText()));
            for (TextRenderInfo info : renderInfo.getCharacterRenderInfos())
            {
                renderCharacter(info);
            }
        }
        else
        {
            if (debug)
                nonData.append(String.format("%4d\t%3.3f %3.3f\t%s\n", chunk, startPoint.get(I1), startPoint.get(I2), renderInfo.getText()));
            if (currentField > -1)
                finishEntry();
            entryBuilder.append(renderInfo.getText());
        }
    }
    catch (IOException e)
    {
        e.printStackTrace();
    }
    finally
    {
        chunk++;
    }
}
项目:testarea-itext5    文件:CertifiedSchoolListExtractionStrategy.java   
public void renderCharacter(TextRenderInfo renderInfo) throws IOException
    {
        Vector startPoint = renderInfo.getBaseline().getStartPoint();

        float x = startPoint.get(I1);

        if (currentField > -1)
        {
            if (isInCurrentField(x))
            {
                entryBuilder.append(renderInfo.getText());
                return;
            }
            if (isInNextField(x))
            {
                currentField++;
                entryBuilder.append('\t').append(renderInfo.getText());
                return;
            }
            finishEntry();
//            nonData.append(String.format("%4d\t%3.3f %3.3f\t%s\n", chunk, startPoint.get(I1), startPoint.get(I2), renderInfo.getText()));
        }
        if (isInNextField(x))
        {
            finishEntry();
            currentField = 0;
        }
        entryBuilder.append(renderInfo.getText());
    }
项目:testarea-itext5    文件:CheckBoxExtractionStrategy.java   
@Override
public Path renderPath(PathPaintingRenderInfo renderInfo)
{
    if (renderInfo.getOperation() != PathPaintingRenderInfo.NO_OP)
    {
        if (rectangle != null)
        {
            Vector a = new Vector(rectangle.getLeft(), rectangle.getBottom(), 1).cross(renderInfo.getCtm());
            Vector b = new Vector(rectangle.getRight(), rectangle.getBottom(), 1).cross(renderInfo.getCtm());
            Vector c = new Vector(rectangle.getRight(), rectangle.getTop(), 1).cross(renderInfo.getCtm());
            Vector d = new Vector(rectangle.getLeft(), rectangle.getTop(), 1).cross(renderInfo.getCtm());

            Box box = new Box(new LineSegment(a, c), new LineSegment(b, d));
            boxes.add(box);

        }
        if (moveToVector != null && lineToVector != null)
        {
            if (!boxes.isEmpty())
            {
                Vector from = moveToVector.cross(renderInfo.getCtm());
                Vector to = lineToVector.cross(renderInfo.getCtm());

                boxes.get(boxes.size() - 1).selectDiagonal(new LineSegment(from, to));
            }
        }
    }

    moveToVector = null;
    lineToVector = null;
    rectangle = null;
    return null;
}
项目:testarea-itext5    文件:HorizontalTextExtractionStrategy.java   
@Override
public int compareTo(TextChunk rhs)
{
    if (rhs instanceof HorizontalTextChunk)
    {
        HorizontalTextChunk horRhs = (HorizontalTextChunk) rhs;
        int rslt = Integer.compare(getLineNumber(), horRhs.getLineNumber());
        if (rslt != 0) return rslt;
        return Float.compare(getStartLocation().get(Vector.I1), rhs.getStartLocation().get(Vector.I1));
    }
    else
        return super.compareTo(rhs);
}
项目:testarea-itext5    文件:PdfCleanUpContentChunk.java   
public Text(PdfString text, Vector startLocation, Vector endLocation, boolean visible, int numOfStrTextBelongsTo) {
    super(visible);
    this.text = text;
    this.startX = startLocation.get(0);
    this.endX = endLocation.get(0);
    this.numOfStrTextBelongsTo = numOfStrTextBelongsTo;
}
项目:cvia    文件:TextChunk.java   
public TextChunk(String string, Vector startLocation, Vector endLocation, float charSpaceWidth, int pageNumber) {
    this.text = string;
    this.startLocation = startLocation;
    this.endLocation = endLocation;
    this.charSpaceWidth = charSpaceWidth;
    this.orientationVector = endLocation.subtract(startLocation).normalize();
    this.orientationMagnitude = (int)(Math.atan2((double)this.orientationVector.get(1), (double)this.orientationVector.get(0)) * 1000.0D);
    Vector origin = new Vector(0.0F, 0.0F, 1.0F);
    this.distPerpendicular = (int)startLocation.subtract(origin).cross(this.orientationVector).get(2);
    this.distParallelStart = this.orientationVector.dot(startLocation);
    this.distParallelEnd = this.orientationVector.dot(endLocation);
    this.pageNumber = pageNumber;
}
项目:cvia    文件:EducationParserTest.java   
@Test
public void testParse() throws Exception {
    CV cv = new CV();
    parser.reset();
    parser.setCV(cv);
    parser.appendTextChunk(
            new TextChunk("National University of Singapore Bachelor of Computing (Computer Science), Honours",
            new Vector(0,0,1),
            new Vector(1,0,1),
            0,
            1));
    parser.parseAndSave();

}
项目:finances    文件:PdfFieldValueExtractor.java   
protected ListMultimap<ImportField, String> getFieldValues(Stream<Entry<Vector, String>> pdfText) throws IOException {
    ListMultimap<ImportField, String> fieldValues = transformValues(new TextWalker(pdfText::iterator).fieldValues, StringBuilder::toString);
    fieldValues.values().removeIf(String::isEmpty);
    return fieldValues;
}
项目:finances    文件:TextExtractor.java   
public Stream<Entry<Vector, String>> getText() {
    return pageText.stream().map(Map::entrySet).flatMap(Collection::stream);
}
项目:testarea-itext5    文件:PageVerticalAnalyzer.java   
float getTransformedY(float x, float y, Matrix m)
{
    return new Vector(x, y, 1).cross(m).get(Vector.I2);
}
项目:testarea-itext5    文件:CheckBoxExtractionStrategy.java   
boolean approximatelyEquals(Vector a, Vector b, float permissiveness)
{
    return a.subtract(b).length() < permissiveness;
}
项目:testarea-itext5    文件:LayoutTextExtractionStrategy.java   
/**
 * @return the start location of the text
 */
public Vector getStartLocation(){
    return startLocation;
}
项目:testarea-itext5    文件:LayoutTextExtractionStrategy.java   
/**
 * @return the end location of the text
 */
public Vector getEndLocation(){
    return endLocation;
}
项目:testarea-itext5    文件:HorizontalTextExtractionStrategy.java   
public HorizontalTextChunk(String string, Vector startLocation, Vector endLocation, float charSpaceWidth)
{
    super(string, startLocation, endLocation, charSpaceWidth);
}
项目:cvia    文件:TextChunk.java   
public Vector getStartLocation() {
    return startLocation;
}
项目:cvia    文件:TextChunk.java   
public Vector getEndLocation() {
    return endLocation;
}
项目:testarea-itext5    文件:HorizontalTextExtractionStrategy2.java   
public Vector getStartLocation()    {   return startLocation;           }
项目:testarea-itext5    文件:HorizontalTextExtractionStrategy2.java   
public Vector getEndLocation()      {   return endLocation;             }