TomRoush / PdfBox-Android

The Apache PdfBox project ported to work on Android
Apache License 2.0
1.01k stars 259 forks source link

pdf convert *.docx Format error (font, color, line break) #568

Open dashugege opened 6 months ago

dashugege commented 6 months ago

try {

        // 加载PDF文档
        PDDocument pdfDocument = PDDocument.load(new File(pdfFilePath));
        PDFTextStripper pdfStripper = new PDFTextStripper();
        pdfStripper.setSortByPosition(true);
        int totalPages = pdfDocument.getNumberOfPages();

        XWPFDocument wordDocument = new XWPFDocument();
        for (int pageIndex = 0; pageIndex < totalPages; pageIndex++) {

            pdfStripper.setStartPage(pageIndex+1);
            pdfStripper.setEndPage(pageIndex+1);
            PDResources resources = pdfDocument.getPages().get(pageIndex).getResources();
            String pageText = pdfStripper.getText(pdfDocument).trim();

            if (!pageText.isEmpty()) {
                XWPFParagraph paragraph = wordDocument.createParagraph();
                XWPFRun run = paragraph.createRun();
                paragraph.setAlignment(ParagraphAlignment.LEFT);
                run.setText(pageText);
                // 设置段落的对齐方式
                paragraph.setAlignment(ParagraphAlignment.LEFT);
                if (pageText.indexOf("\n") > 0) {
                    //设置换行
                    String[] text = pageText.split("\n");
                    for (int i = 0; i < text.length; i++) {
                        if (i != 0) {
                            run.addCarriageReturn();
                        }
                        run.setText(text[i]);
                    }
                } else {
                    run.setText(pageText);
                }
            }

            for (COSName xObjectName : resources.getXObjectNames()) {
                PDXObject xObject = resources.getXObject(xObjectName);
                if (xObject instanceof PDImageXObject) {
                    PDImageXObject image = (PDImageXObject) xObject;
                    Bitmap bitmap = image.getImage();

                    // 将图片保存到临时文件
                    File tempImageFile = new File(parentPath + "/" + "convert_temp_image.png");
                    if (tempImageFile.exists()) {
                        tempImageFile.delete();
                    }
                    tempImageFile.createNewFile();
                    ByteArrayOutputStream bos = new ByteArrayOutputStream();
                    bitmap.compress(Bitmap.CompressFormat.JPEG, 75, bos);
                    byte[] bitmapdata = bos.toByteArray();

                    FileOutputStream fos = new FileOutputStream(tempImageFile);
                    fos.write(bitmapdata);
                    fos.close();

                    // 在Word文档中插入图片
                    try (InputStream is = new FileInputStream(tempImageFile)) {
                        int format = XWPFDocument.PICTURE_TYPE_JPEG;
                        String fileName = tempImageFile.getName();
                        int indent = 0;
                        XWPFParagraph imgParagraph = wordDocument.createParagraph();
                        XWPFRun imgRun = imgParagraph.createRun();
                        imgRun.addBreak();
                        imgRun.addPicture(is, format, fileName, Units.toEMU(image.getWidth()), Units.toEMU(image.getHeight()));
                    }
                }
            }

        }
        //保存Word文档
        try (FileOutputStream out = new FileOutputStream(wordFilePath)) {
            wordDocument.write(out);
        }

        // 关闭PDF文档
        pdfDocument.close();

        System.out.println("PDF内容已成功写入Word文件!");

    } catch (Exception e) {
        e.printStackTrace();
    }