yasserg / crawler4j

Open Source Web Crawler for Java
Apache License 2.0
4.56k stars 1.93k forks source link

Incompatible JE Version in Maven Dependency #477

Open afan0918 opened 1 month ago

afan0918 commented 1 month ago

Problem Description:

The Maven dependency for <je.version> in this project is currently set to 5.0.84. However, version 5.0.84 has been removed and is no longer available in the Maven repository. The highest available version now is 5.0.73. This causes Maven builds to fail since the specified version cannot be resolved.

Steps to Reproduce:

  1. Clone the repository.
  2. Run Maven build.
  3. The build fails due to the missing dependency: <je.version>5.0.84</je.version>.

Expected Behavior:

The build should complete successfully with a valid version of the JE dependency available in the Maven repository.

Suggested Fix:

Update the pom.xml to use the latest available JE version, which is 5.0.73, like this:

<je.version>5.0.73</je.version>

This allows the project to build without manual intervention.

Additional Information:

Manually adjusting the dependency resolves the issue and allows the project to run as expected.

afan0918 commented 1 month ago

This pom.xml can be used.

<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
    <modelVersion>4.0.0</modelVersion>
    <parent>
        <artifactId>crawler4j-parent</artifactId>
        <groupId>edu.uci.ics</groupId>
        <version>4.4.0</version>
        <relativePath>../pom.xml</relativePath>
    </parent>
    <artifactId>crawler4j</artifactId>
    <name>${project.groupId}:${project.artifactId}</name>
    <description>Open Source Web Crawler for Java</description>
    <url>https://github.com/yasserg/crawler4j</url>

    <properties>
        <slf4j.version>1.7.22</slf4j.version>
        <logback.version>1.1.7</logback.version>
        <guava.version>24.0-jre</guava.version>
        <apache.http.components.version>4.5.3</apache.http.components.version>
        <je.version>5.0.73</je.version>
        <apache.tika.version>1.16</apache.tika.version>
        <!--test dependency versions -->
        <junit.version>4.12</junit.version>
        <wiremock.version>2.14.0</wiremock.version>
        <spock.version>1.0-groovy-2.4</spock.version>
        <groovy.version>2.4.12</groovy.version>
    </properties>

    <profiles>
        <profile>
            <id>fatjar</id>
            <build>
                <plugins>
                    <plugin>
                        <artifactId>maven-assembly-plugin</artifactId>
                        <version>2.5.3</version>
                        <configuration>
                            <descriptorRefs>
                                <descriptorRef>jar-with-dependencies</descriptorRef>
                            </descriptorRefs>
                        </configuration>
                        <executions>
                            <execution>
                                <id>make-fat-jar</id>
                                <phase>package</phase>
                                <goals>
                                    <goal>single</goal>
                                </goals>
                                <configuration>
                                    <finalName>crawler4j-${project.version}</finalName>
                                </configuration>
                            </execution>
                        </executions>
                    </plugin>
                </plugins>
            </build>
        </profile>
    </profiles>

    <build>
        <plugins>
            <plugin>
                <groupId>org.apache.maven.plugins</groupId>
                <artifactId>maven-compiler-plugin</artifactId>
            </plugin>
            <plugin>
                <groupId>org.apache.maven.plugins</groupId>
                <artifactId>maven-jar-plugin</artifactId>
                <version>2.5</version>
                <configuration>
                    <excludes>
                        <exclude>**/*.properties</exclude>
                    </excludes>
                </configuration>
            </plugin>
            <plugin>
                <groupId>org.apache.maven.plugins</groupId>
                <artifactId>maven-checkstyle-plugin</artifactId>
            </plugin>
            <plugin>
                <groupId>org.jacoco</groupId>
                <artifactId>jacoco-maven-plugin</artifactId>
                <version>0.7.9</version>
                <executions>
                    <execution>
                        <id>pre-unit-test</id>
                        <goals>
                            <goal>prepare-agent</goal>
                        </goals>
                    </execution>
                    <execution>
                        <id>post-unit-test</id>
                        <phase>test</phase>
                        <goals>
                            <goal>report</goal>
                        </goals>
                        <configuration>
                            <excludes>
                                <exclude>**/exceptions/**</exclude>
                            </excludes>
                        </configuration>
                    </execution>
                </executions>
            </plugin>
            <!-- Groovy compiler stuff -->
            <plugin>
                <groupId>org.codehaus.gmavenplus</groupId>
                <artifactId>gmavenplus-plugin</artifactId>
                <version>1.5</version>
                <executions>
                    <execution>
                        <goals>
                            <goal>addSources</goal>
                            <goal>addTestSources</goal>
                            <goal>generateStubs</goal>
                            <goal>compile</goal>
                            <goal>testGenerateStubs</goal>
                            <goal>testCompile</goal>
                            <goal>removeStubs</goal>
                            <goal>removeTestStubs</goal>
                        </goals>
                    </execution>
                </executions>
            </plugin>
            <plugin>
                <groupId>org.codehaus.mojo</groupId>
                <artifactId>build-helper-maven-plugin</artifactId>
                <version>1.9.1</version>
                <executions>
                    <execution>
                        <id>add-source</id>
                        <phase>generate-sources</phase>
                        <goals>
                            <goal>add-source</goal>
                        </goals>
                        <configuration>
                            <sources>
                                <source>src/main/groovy</source>
                            </sources>
                        </configuration>
                    </execution>
                    <execution>
                        <id>add-test-source</id>
                        <phase>generate-test-sources</phase>
                        <goals>
                            <goal>add-test-source</goal>
                        </goals>
                        <configuration>
                            <sources>
                                <source>src/test/groovy</source>
                            </sources>
                        </configuration>
                    </execution>
                </executions>
            </plugin>
        </plugins>
    </build>

    <dependencies>

    <!-- Compile time Dependencies -->

    <dependency>
        <!-- Logging framework -->
        <groupId>org.slf4j</groupId>
        <artifactId>slf4j-api</artifactId>
        <version>${slf4j.version}</version>
    </dependency>
    <dependency>
        <!-- Implementation of slf4j -->
        <groupId>ch.qos.logback</groupId>
        <artifactId>logback-classic</artifactId>
        <version>${logback.version}</version>
        <scope>runtime</scope>
    </dependency>
    <dependency>
        <!-- Google's core Java libraries -->
        <groupId>com.google.guava</groupId>
        <artifactId>guava</artifactId>
        <version>${guava.version}</version>
    </dependency>
    <dependency>
        <groupId>org.apache.httpcomponents</groupId>
        <artifactId>httpclient</artifactId>
        <version>${apache.http.components.version}</version>
        <scope>compile</scope>
    </dependency>

    <dependency>
        <groupId>com.sleepycat</groupId>
        <artifactId>je</artifactId>
        <version>${je.version}</version>
    </dependency>

    <dependency>
        <groupId>org.apache.tika</groupId>
        <artifactId>tika-parsers</artifactId>
        <version>${apache.tika.version}</version>
        <exclusions>
            <exclusion>
                <groupId>org.apache.poi</groupId>
                <artifactId>poi</artifactId>
            </exclusion>
            <exclusion>
                <groupId>org.apache.poi</groupId>
                <artifactId>poi-ooxml</artifactId>
            </exclusion>
            <exclusion>
                <groupId>org.apache.poi</groupId>
                <artifactId>poi-scratchpad</artifactId>
            </exclusion>
            <exclusion>
                <groupId>org.apache.poi</groupId>
                <artifactId>poi-ooxml</artifactId>
            </exclusion>
            <exclusion>
                <groupId>org.apache.poi</groupId>
                <artifactId>poi-ooxml-schemas</artifactId>
            </exclusion>
            <exclusion>
                <groupId>org.apache.pdfbox</groupId>
                <artifactId>pdfbox</artifactId>
            </exclusion>
            <exclusion>
                <groupId>org.apache.pdfbox</groupId>
                <artifactId>pdfbox-tools</artifactId>
            </exclusion>
            <exclusion>
                <groupId>org.apache.pdfbox</groupId>
                <artifactId>jempbox</artifactId>
            </exclusion>
            <exclusion>
                <groupId>org.tallison</groupId>
                <artifactId>jmatio</artifactId>
            </exclusion>
            <exclusion>
                <groupId>com.healthmarketscience.jackcess</groupId>
                <artifactId>jackcess</artifactId>
            </exclusion>
            <exclusion>
                <groupId>com.healthmarketscience.jackcess</groupId>
                <artifactId>jackcess-encrypt</artifactId>
            </exclusion>
            <exclusion>
                <groupId>org.tukaani</groupId>
                <artifactId>xz</artifactId>
            </exclusion>
            <exclusion>
                <groupId>com.github.junrar</groupId>
                <artifactId>junrar</artifactId>
            </exclusion>
            <exclusion>
                <groupId>org.apache.opennlp</groupId>
                <artifactId>opennlp-tools</artifactId>
            </exclusion>
            <exclusion>
                <groupId>org.apache.sis.core</groupId>
                <artifactId>sis-utility</artifactId>
            </exclusion>
            <exclusion>
                <groupId>org.apache.sis.storage</groupId>
                <artifactId>sis-netcdf</artifactId>
            </exclusion>
            <exclusion>
                <groupId>org.apache.sis.core</groupId>
                <artifactId>sis-metadata</artifactId>
            </exclusion>
            <exclusion>
                <groupId>org.opengis</groupId>
                <artifactId>geoapi</artifactId>
            </exclusion>
            <exclusion>
                <groupId>com.pff</groupId>
                <artifactId>java-libpst</artifactId>
            </exclusion>
            <exclusion>
                <groupId>com.rometools</groupId>
                <artifactId>rome</artifactId>
            </exclusion>
            <exclusion>
                <groupId>org.json</groupId>
                <artifactId>json</artifactId>
            </exclusion>
            <exclusion>
                <groupId>edu.ucar</groupId>
                <artifactId>netcdf4</artifactId>
            </exclusion>
            <exclusion>
                <groupId>edu.ucar</groupId>
                <artifactId>grib</artifactId>
            </exclusion>
            <exclusion>
                <groupId>edu.ucar</groupId>
                <artifactId>cdm</artifactId>
            </exclusion>
            <exclusion>
                <groupId>edu.ucar</groupId>
                <artifactId>httpservices</artifactId>
            </exclusion>
            <exclusion>
                <groupId>org.gagravarr</groupId>
                <artifactId>vorbis-java-tika</artifactId>
            </exclusion>
            <exclusion>
                <groupId>org.gagravarr</groupId>
                <artifactId>vorbis-java-core</artifactId>
            </exclusion>
            <exclusion>
                <groupId>org.ow2.asm</groupId>
                <artifactId>asm</artifactId>
            </exclusion>
            <exclusion>
                <groupId>com.googlecode.mp4parser</groupId>
                <artifactId>isoparser</artifactId>
            </exclusion>
            <exclusion>
                <groupId>com.drewnoakes</groupId>
                <artifactId>metadata-extractor</artifactId>
            </exclusion>
            <exclusion>
                <groupId>org.apache.commons</groupId>
                <artifactId>commons-csv</artifactId>
            </exclusion>
            <exclusion>
                <groupId>org.apache.commons</groupId>
                <artifactId>commons-exec</artifactId>
            </exclusion>
            <exclusion>
                <groupId>org.apache.commons</groupId>
                <artifactId>commons-compress</artifactId>
            </exclusion>
            <exclusion>
                <groupId>commons-codec</groupId>
                <artifactId>commons-codec</artifactId>
            </exclusion>
            <exclusion>
                <groupId>com.googlecode.json-simple</groupId>
                <artifactId>json-simple</artifactId>
            </exclusion>
            <exclusion>
                <groupId>com.google.code.gson</groupId>
                <artifactId>gson</artifactId>
            </exclusion>
            <exclusion>
                <groupId>de.l3s.boilerpipe</groupId>
                <artifactId>boilerpipe</artifactId>
            </exclusion>
            <exclusion>
                <groupId>com.googlecode.juniversalchardet</groupId>
                <artifactId>juniversalchardet</artifactId>
            </exclusion>
            <exclusion>
                <groupId>org.codelibs</groupId>
                <artifactId>jhighlight</artifactId>
            </exclusion>
            <exclusion>
                <groupId>org.bouncycastle</groupId>
                <artifactId>bcmail-jdk15on</artifactId>
            </exclusion>
            <exclusion>
                <groupId>org.bouncycastle</groupId>
                <artifactId>bcprov-jdk15on</artifactId>
            </exclusion>
            <exclusion>
                <groupId>org.apache.cxf</groupId>
                <artifactId>cxf-rt-rs-client</artifactId>
            </exclusion>
            <exclusion>
                <groupId>com.fasterxml.jackson.core</groupId>
                <artifactId>jackson-core</artifactId>
            </exclusion>
            <exclusion>
                <groupId>com.tdunning</groupId>
                <artifactId>json</artifactId>
            </exclusion>
            <exclusion>
                <groupId>edu.usc.ir</groupId>
                <artifactId>sentiment-analysis-parser</artifactId>
            </exclusion>
        </exclusions>
    </dependency>

    <!-- Test Dependencies -->
        <dependency>
            <groupId>junit</groupId>
            <artifactId>junit</artifactId>
            <version>${junit.version}</version>
            <scope>test</scope>
        </dependency>
        <dependency>
            <groupId>com.github.tomakehurst</groupId>
            <artifactId>wiremock</artifactId>
            <version>${wiremock.version}</version>
            <scope>test</scope>
        </dependency>
        <dependency>
            <groupId>org.codehaus.groovy</groupId>
            <artifactId>groovy-all</artifactId>
            <version>${groovy.version}</version>
            <scope>test</scope>
        </dependency>
        <dependency>
            <groupId>org.spockframework</groupId>
            <artifactId>spock-core</artifactId>
            <version>${spock.version}</version>
            <scope>test</scope>
        </dependency>
    </dependencies>

    <repositories>
        <repository>
            <id>oracleReleases</id>
            <name>Oracle Released Java Packages</name>
            <url>http://download.oracle.com/maven</url>
            <layout>default</layout>
        </repository>
    </repositories>

</project>
TolstoyDotCom commented 1 month ago

That POM and the gradle file didn't work due to missing dependencies. I didn't want to use code4craft/webmagic but I switched to that because it works.