如何使用带有 Pdfreader 类的 itextsharp 阅读 PDF 内容。我的 PDF 可能包含纯文本或文本图像。
sing iTextSharp.text.pdf; using iTextSharp.text.pdf.parser; using System.IO; public string ReadPdfFile(string fileName) { StringBuilder text = new StringBuilder(); if (File.Exists(fileName)) { PdfReader pdfReader = new PdfReader(fileName); for (int page = 1; page <= pdfReader.NumberOfPages; page++) { ITextExtractionStrategy strategy = new SimpleTextExtractionStrategy(); string currentText = PdfTextExtractor.GetTextFromPage(pdfReader, page, strategy); currentText = Encoding.UTF8.GetString(ASCIIEncoding.Convert(Encoding.Default, Encoding.UTF8, Encoding.Default.GetBytes(currentText))); text.Append(currentText); } pdfReader.Close(); } return text.ToString(); }