Open afan0918 opened 1 month ago
This pom.xml can be used.
<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
<modelVersion>4.0.0</modelVersion>
<parent>
<artifactId>crawler4j-parent</artifactId>
<groupId>edu.uci.ics</groupId>
<version>4.4.0</version>
<relativePath>../pom.xml</relativePath>
</parent>
<artifactId>crawler4j</artifactId>
<name>${project.groupId}:${project.artifactId}</name>
<description>Open Source Web Crawler for Java</description>
<url>https://github.com/yasserg/crawler4j</url>
<properties>
<slf4j.version>1.7.22</slf4j.version>
<logback.version>1.1.7</logback.version>
<guava.version>24.0-jre</guava.version>
<apache.http.components.version>4.5.3</apache.http.components.version>
<je.version>5.0.73</je.version>
<apache.tika.version>1.16</apache.tika.version>
<!--test dependency versions -->
<junit.version>4.12</junit.version>
<wiremock.version>2.14.0</wiremock.version>
<spock.version>1.0-groovy-2.4</spock.version>
<groovy.version>2.4.12</groovy.version>
</properties>
<profiles>
<profile>
<id>fatjar</id>
<build>
<plugins>
<plugin>
<artifactId>maven-assembly-plugin</artifactId>
<version>2.5.3</version>
<configuration>
<descriptorRefs>
<descriptorRef>jar-with-dependencies</descriptorRef>
</descriptorRefs>
</configuration>
<executions>
<execution>
<id>make-fat-jar</id>
<phase>package</phase>
<goals>
<goal>single</goal>
</goals>
<configuration>
<finalName>crawler4j-${project.version}</finalName>
</configuration>
</execution>
</executions>
</plugin>
</plugins>
</build>
</profile>
</profiles>
<build>
<plugins>
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-compiler-plugin</artifactId>
</plugin>
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-jar-plugin</artifactId>
<version>2.5</version>
<configuration>
<excludes>
<exclude>**/*.properties</exclude>
</excludes>
</configuration>
</plugin>
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-checkstyle-plugin</artifactId>
</plugin>
<plugin>
<groupId>org.jacoco</groupId>
<artifactId>jacoco-maven-plugin</artifactId>
<version>0.7.9</version>
<executions>
<execution>
<id>pre-unit-test</id>
<goals>
<goal>prepare-agent</goal>
</goals>
</execution>
<execution>
<id>post-unit-test</id>
<phase>test</phase>
<goals>
<goal>report</goal>
</goals>
<configuration>
<excludes>
<exclude>**/exceptions/**</exclude>
</excludes>
</configuration>
</execution>
</executions>
</plugin>
<!-- Groovy compiler stuff -->
<plugin>
<groupId>org.codehaus.gmavenplus</groupId>
<artifactId>gmavenplus-plugin</artifactId>
<version>1.5</version>
<executions>
<execution>
<goals>
<goal>addSources</goal>
<goal>addTestSources</goal>
<goal>generateStubs</goal>
<goal>compile</goal>
<goal>testGenerateStubs</goal>
<goal>testCompile</goal>
<goal>removeStubs</goal>
<goal>removeTestStubs</goal>
</goals>
</execution>
</executions>
</plugin>
<plugin>
<groupId>org.codehaus.mojo</groupId>
<artifactId>build-helper-maven-plugin</artifactId>
<version>1.9.1</version>
<executions>
<execution>
<id>add-source</id>
<phase>generate-sources</phase>
<goals>
<goal>add-source</goal>
</goals>
<configuration>
<sources>
<source>src/main/groovy</source>
</sources>
</configuration>
</execution>
<execution>
<id>add-test-source</id>
<phase>generate-test-sources</phase>
<goals>
<goal>add-test-source</goal>
</goals>
<configuration>
<sources>
<source>src/test/groovy</source>
</sources>
</configuration>
</execution>
</executions>
</plugin>
</plugins>
</build>
<dependencies>
<!-- Compile time Dependencies -->
<dependency>
<!-- Logging framework -->
<groupId>org.slf4j</groupId>
<artifactId>slf4j-api</artifactId>
<version>${slf4j.version}</version>
</dependency>
<dependency>
<!-- Implementation of slf4j -->
<groupId>ch.qos.logback</groupId>
<artifactId>logback-classic</artifactId>
<version>${logback.version}</version>
<scope>runtime</scope>
</dependency>
<dependency>
<!-- Google's core Java libraries -->
<groupId>com.google.guava</groupId>
<artifactId>guava</artifactId>
<version>${guava.version}</version>
</dependency>
<dependency>
<groupId>org.apache.httpcomponents</groupId>
<artifactId>httpclient</artifactId>
<version>${apache.http.components.version}</version>
<scope>compile</scope>
</dependency>
<dependency>
<groupId>com.sleepycat</groupId>
<artifactId>je</artifactId>
<version>${je.version}</version>
</dependency>
<dependency>
<groupId>org.apache.tika</groupId>
<artifactId>tika-parsers</artifactId>
<version>${apache.tika.version}</version>
<exclusions>
<exclusion>
<groupId>org.apache.poi</groupId>
<artifactId>poi</artifactId>
</exclusion>
<exclusion>
<groupId>org.apache.poi</groupId>
<artifactId>poi-ooxml</artifactId>
</exclusion>
<exclusion>
<groupId>org.apache.poi</groupId>
<artifactId>poi-scratchpad</artifactId>
</exclusion>
<exclusion>
<groupId>org.apache.poi</groupId>
<artifactId>poi-ooxml</artifactId>
</exclusion>
<exclusion>
<groupId>org.apache.poi</groupId>
<artifactId>poi-ooxml-schemas</artifactId>
</exclusion>
<exclusion>
<groupId>org.apache.pdfbox</groupId>
<artifactId>pdfbox</artifactId>
</exclusion>
<exclusion>
<groupId>org.apache.pdfbox</groupId>
<artifactId>pdfbox-tools</artifactId>
</exclusion>
<exclusion>
<groupId>org.apache.pdfbox</groupId>
<artifactId>jempbox</artifactId>
</exclusion>
<exclusion>
<groupId>org.tallison</groupId>
<artifactId>jmatio</artifactId>
</exclusion>
<exclusion>
<groupId>com.healthmarketscience.jackcess</groupId>
<artifactId>jackcess</artifactId>
</exclusion>
<exclusion>
<groupId>com.healthmarketscience.jackcess</groupId>
<artifactId>jackcess-encrypt</artifactId>
</exclusion>
<exclusion>
<groupId>org.tukaani</groupId>
<artifactId>xz</artifactId>
</exclusion>
<exclusion>
<groupId>com.github.junrar</groupId>
<artifactId>junrar</artifactId>
</exclusion>
<exclusion>
<groupId>org.apache.opennlp</groupId>
<artifactId>opennlp-tools</artifactId>
</exclusion>
<exclusion>
<groupId>org.apache.sis.core</groupId>
<artifactId>sis-utility</artifactId>
</exclusion>
<exclusion>
<groupId>org.apache.sis.storage</groupId>
<artifactId>sis-netcdf</artifactId>
</exclusion>
<exclusion>
<groupId>org.apache.sis.core</groupId>
<artifactId>sis-metadata</artifactId>
</exclusion>
<exclusion>
<groupId>org.opengis</groupId>
<artifactId>geoapi</artifactId>
</exclusion>
<exclusion>
<groupId>com.pff</groupId>
<artifactId>java-libpst</artifactId>
</exclusion>
<exclusion>
<groupId>com.rometools</groupId>
<artifactId>rome</artifactId>
</exclusion>
<exclusion>
<groupId>org.json</groupId>
<artifactId>json</artifactId>
</exclusion>
<exclusion>
<groupId>edu.ucar</groupId>
<artifactId>netcdf4</artifactId>
</exclusion>
<exclusion>
<groupId>edu.ucar</groupId>
<artifactId>grib</artifactId>
</exclusion>
<exclusion>
<groupId>edu.ucar</groupId>
<artifactId>cdm</artifactId>
</exclusion>
<exclusion>
<groupId>edu.ucar</groupId>
<artifactId>httpservices</artifactId>
</exclusion>
<exclusion>
<groupId>org.gagravarr</groupId>
<artifactId>vorbis-java-tika</artifactId>
</exclusion>
<exclusion>
<groupId>org.gagravarr</groupId>
<artifactId>vorbis-java-core</artifactId>
</exclusion>
<exclusion>
<groupId>org.ow2.asm</groupId>
<artifactId>asm</artifactId>
</exclusion>
<exclusion>
<groupId>com.googlecode.mp4parser</groupId>
<artifactId>isoparser</artifactId>
</exclusion>
<exclusion>
<groupId>com.drewnoakes</groupId>
<artifactId>metadata-extractor</artifactId>
</exclusion>
<exclusion>
<groupId>org.apache.commons</groupId>
<artifactId>commons-csv</artifactId>
</exclusion>
<exclusion>
<groupId>org.apache.commons</groupId>
<artifactId>commons-exec</artifactId>
</exclusion>
<exclusion>
<groupId>org.apache.commons</groupId>
<artifactId>commons-compress</artifactId>
</exclusion>
<exclusion>
<groupId>commons-codec</groupId>
<artifactId>commons-codec</artifactId>
</exclusion>
<exclusion>
<groupId>com.googlecode.json-simple</groupId>
<artifactId>json-simple</artifactId>
</exclusion>
<exclusion>
<groupId>com.google.code.gson</groupId>
<artifactId>gson</artifactId>
</exclusion>
<exclusion>
<groupId>de.l3s.boilerpipe</groupId>
<artifactId>boilerpipe</artifactId>
</exclusion>
<exclusion>
<groupId>com.googlecode.juniversalchardet</groupId>
<artifactId>juniversalchardet</artifactId>
</exclusion>
<exclusion>
<groupId>org.codelibs</groupId>
<artifactId>jhighlight</artifactId>
</exclusion>
<exclusion>
<groupId>org.bouncycastle</groupId>
<artifactId>bcmail-jdk15on</artifactId>
</exclusion>
<exclusion>
<groupId>org.bouncycastle</groupId>
<artifactId>bcprov-jdk15on</artifactId>
</exclusion>
<exclusion>
<groupId>org.apache.cxf</groupId>
<artifactId>cxf-rt-rs-client</artifactId>
</exclusion>
<exclusion>
<groupId>com.fasterxml.jackson.core</groupId>
<artifactId>jackson-core</artifactId>
</exclusion>
<exclusion>
<groupId>com.tdunning</groupId>
<artifactId>json</artifactId>
</exclusion>
<exclusion>
<groupId>edu.usc.ir</groupId>
<artifactId>sentiment-analysis-parser</artifactId>
</exclusion>
</exclusions>
</dependency>
<!-- Test Dependencies -->
<dependency>
<groupId>junit</groupId>
<artifactId>junit</artifactId>
<version>${junit.version}</version>
<scope>test</scope>
</dependency>
<dependency>
<groupId>com.github.tomakehurst</groupId>
<artifactId>wiremock</artifactId>
<version>${wiremock.version}</version>
<scope>test</scope>
</dependency>
<dependency>
<groupId>org.codehaus.groovy</groupId>
<artifactId>groovy-all</artifactId>
<version>${groovy.version}</version>
<scope>test</scope>
</dependency>
<dependency>
<groupId>org.spockframework</groupId>
<artifactId>spock-core</artifactId>
<version>${spock.version}</version>
<scope>test</scope>
</dependency>
</dependencies>
<repositories>
<repository>
<id>oracleReleases</id>
<name>Oracle Released Java Packages</name>
<url>http://download.oracle.com/maven</url>
<layout>default</layout>
</repository>
</repositories>
</project>
That POM and the gradle file didn't work due to missing dependencies. I didn't want to use code4craft/webmagic but I switched to that because it works.
Problem Description:
The Maven dependency for
<je.version>
in this project is currently set to5.0.84
. However, version5.0.84
has been removed and is no longer available in the Maven repository. The highest available version now is5.0.73
. This causes Maven builds to fail since the specified version cannot be resolved.Steps to Reproduce:
<je.version>5.0.84</je.version>
.Expected Behavior:
The build should complete successfully with a valid version of the JE dependency available in the Maven repository.
Suggested Fix:
Update the
pom.xml
to use the latest available JE version, which is5.0.73
, like this:This allows the project to build without manual intervention.
Additional Information:
Manually adjusting the dependency resolves the issue and allows the project to run as expected.