Closed Zuhaib121 closed 7 months ago
A simple java application (or a series of java classes) that create pdf, read text from pdf, extract words from the pdf and even fetch the coordinates of each word in the document
This application uses open-source Apache PDFBox dependency that allows creation of new PDF documents, manipulation of existing documents and the ability to extract content from documents.
Reference link: https://pdfbox.apache.org/index.html
<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 https://maven.apache.org/xsd/maven-4.0.0.xsd">
<parent>
<groupId>org.springframework.boot</groupId>
<artifactId>spring-boot-starter-parent</artifactId>
<version>2.3.4.RELEASE</version>
<relativePath/>
</parent>
<groupId>com.viveknaskar.pdfgenerator</groupId>
<artifactId>pdf-generator</artifactId>
<version>0.0.1-SNAPSHOT</version>
<name>PDF Generator</name>
<description>Java Application to Create PDF using Apache PDFBox</description>
<dependencies>
<dependency>
<groupId>org.apache.pdfbox</groupId>
<artifactId>pdfbox</artifactId>
<version>2.0.22</version>
</dependency>
</dependencies>
<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 https://maven.apache.org/xsd/maven-4.0.0.xsd">
<parent>
<groupId>org.springframework.boot</groupId>
<artifactId>spring-boot-starter-parent</artifactId>
<version>2.3.4.RELEASE</version>
<relativePath/>
</parent>
<groupId>com.viveknaskar.pdfgenerator</groupId>
<artifactId>pdf-generator</artifactId>
<version>0.0.1-SNAPSHOT</version>
<name>PDF Generator</name>
<description>Java Application to Create PDF using Apache PDFBox</description>
<dependencies>
<dependency>
<groupId>org.apache.pdfbox</groupId>
<artifactId>pdfbox</artifactId>
<version>2.0.22</version>
</dependency>
</dependencies>
package com.viveknaskar.pdfgenerator;
import org.apache.pdfbox.pdmodel.PDDocument; import org.apache.pdfbox.pdmodel.PDPage; import org.apache.pdfbox.pdmodel.PDPageContentStream; import org.apache.pdfbox.pdmodel.font.PDFont; import org.apache.pdfbox.pdmodel.font.PDType1Font;
import java.io.IOException;
public class GeneratePDF {
} package com.viveknaskar.pdfgenerator;
import org.apache.pdfbox.pdmodel.PDDocument; import org.apache.pdfbox.pdmodel.PDPage; import org.apache.pdfbox.pdmodel.PDPageContentStream; import org.apache.pdfbox.pdmodel.font.PDFont; import org.apache.pdfbox.pdmodel.font.PDType1Font;
import java.io.IOException;
public class GeneratePDF {
} package com.viveknaskar.pdfgenerator;
import org.apache.pdfbox.pdmodel.PDDocument; import org.apache.pdfbox.text.PDFTextStripper; import org.apache.pdfbox.text.TextPosition;
import java.io.*; import java.util.ArrayList; import java.util.Collections; import java.util.List;
public class GetWordsFromPDF extends PDFTextStripper {
} package com.viveknaskar.pdfgenerator;
import org.apache.pdfbox.pdmodel.PDDocument; import org.apache.pdfbox.text.PDFTextStripper; import org.apache.pdfbox.text.TextPosition;
import java.io.*; import java.util.ArrayList; import java.util.Collections; import java.util.List;
public class GetWordsFromPDF extends PDFTextStripper {
} package com.viveknaskar.pdfgenerator;
import org.apache.pdfbox.pdmodel.PDDocument; import org.apache.pdfbox.text.PDFTextStripper;
import java.io.File; import java.io.IOException;
public class ReadFromPDF {
}