itext读取PDF
import java.io.File; import java.io.FileInputStream; import java.io.IOException; import org.pdfbox.pdfparser.PDFParser; import org.pdfbox.pdmodel.PDDocument; import org.pdfbox.util.PDFTextStripper; /** * * @author 刘毅 * @date 2010-2-24 * @ClassName ReaderForPDF.java * @Email liu_yi126@163.com * @param 读取PDF * @param */ public class ReaderForPDF { /** * 读PDF文件,使用了pdfbox开源项目 * @param fileName */ public void readPDF(String fileName) { File file = new File(fileName); FileInputStream in = null; try { in = new FileInputStream(fileName); //新建一个PDF解析器对象 PDFParser parser = new PDFParser(in); //对PDF文件进行解析 parser.parse(); //获取解析后得到的PDF文档对象 PDDocument pdfdocument = parser.getPDDocument(); //新建一个PDF文本剥离器 PDFTextStripper stripper = new PDFTextStripper(); //从PDF文档对象中剥离文本 String result = stripper.getText(pdfdocument); System.out.println("PDF文件" + file.getAbsolutePath() + "的文本内容如下:"); System.out.println(result); } catch (Exception e) { System.out.println("读取PDF文件"+ file.getAbsolutePath() + "生失败!" + e); e.printStackTrace(); } finally { if (in != null){ try { in.close(); } catch (IOException e1) { } } } } public static void main(String[] args) { ReaderForPDF pdf = new ReaderForPDF(); String fileName = "src/tempPDF.pdf"; try { pdf.readPDF(fileName); } catch (Exception e) { e.printStackTrace(); } } }