org.apache.hadoop.fs.FsUrlConnection cannot be cast to java.net.HttpURLConnection

Aload commented 2 years ago

version：spark3.1.2

Caused by: java.lang.ClassCastException: org.apache.hadoop.fs.FsUrlConnection cannot be cast to java.net.HttpURLConnection at com.clickhouse.client.http.HttpUrlConnectionImpl.newConnection(HttpUrlConnectionImpl.java:86) at com.clickhouse.client.http.HttpUrlConnectionImpl.(HttpUrlConnectionImpl.java:163) at com.clickhouse.client.http.ClickHouseHttpConnectionFactory.createConnection(ClickHouseHttpConnectionFactory.java:12) at com.clickhouse.client.http.ClickHouseHttpClient.newConnection(ClickHouseHttpClient.java:41) at com.clickhouse.client.http.ClickHouseHttpClient.newConnection(ClickHouseHttpClient.java:23) at com.clickhouse.client.AbstractClient.getConnection(AbstractClient.java:143) at com.clickhouse.client.http.ClickHouseHttpClient.postRequest(ClickHouseHttpClient.java:74) at com.clickhouse.client.http.ClickHouseHttpClient.execute(ClickHouseHttpClient.java:114) at com.clickhouse.client.ClickHouseRequest.execute(ClickHouseRequest.java:1422) at com.clickhouse.jdbc.internal.ClickHouseConnectionImpl.getServerInfo(ClickHouseConnectionImpl.java:77)

zhicwu commented 2 years ago

Can you provide context and example to reproduce the issue?

Aload commented 2 years ago

Can you provide context and example to reproduce the issue?

OK

env： Spark：3.1.2 clickhouse：22.2.2 revision 54455 hadoop ：CDH6.3.2-3.0.0

pom.xml ` <?xml version="1.0" encoding="UTF-8"?> <project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">

4.0.0

<groupId>org.example</groupId>
<artifactId>TestClickhouse</artifactId>
<version>1.0-SNAPSHOT</version>

<properties>
    <maven.compiler.source>8</maven.compiler.source>
    <maven.compiler.target>8</maven.compiler.target>
    <scala.version>2.12.10</scala.version>
    <spark.version>3.1.2</spark.version>
    <hadoop.version>2.7.3</hadoop.version>
    <scala.binary.version>2.12</scala.binary.version>
</properties>

<dependencies>
    <!--spark-start-->
    <dependency>
        <groupId>org.apache.spark</groupId>
        <artifactId>spark-core_${scala.binary.version}</artifactId>
        <version>${spark.version}</version>
    </dependency>
    <dependency>
        <groupId>org.apache.spark</groupId>
        <artifactId>spark-sql_${scala.binary.version}</artifactId>
        <version>${spark.version}</version>
    </dependency>
    <!-- clickhouse   -->

    <dependency>
        <!-- will stop using ru.yandex.clickhouse starting from 0.4.0  -->
        <groupId>com.clickhouse</groupId>
        <artifactId>clickhouse-jdbc</artifactId>
        <version>0.3.2-patch8</version>
        <!-- below is only needed when all you want is a shaded jar -->
        <classifier>http</classifier>
        <exclusions>
            <exclusion>
                <groupId>*</groupId>
                <artifactId>*</artifactId>
            </exclusion>
        </exclusions>
    </dependency>

    <dependency>
        <groupId>org.apache.hadoop</groupId>
        <artifactId>hadoop-client</artifactId>
        <version>${hadoop.version}</version>
        <exclusions>
            <exclusion>
                <groupId>org.slf4j</groupId>
                <artifactId>*</artifactId>
            </exclusion>
            <exclusion>
                <groupId>log4j</groupId>
                <artifactId>*</artifactId>
            </exclusion>
        </exclusions>
    </dependency>
</dependencies>

<build>
    <plugins>
        <plugin>
            <groupId>org.apache.maven.plugins</groupId>
            <artifactId>maven-deploy-plugin</artifactId>
            <version>2.8.2</version>
            <configuration>
                <skip>true</skip>
            </configuration>
        </plugin>
        <plugin>
            <groupId>org.apache.maven.plugins</groupId>
            <artifactId>maven-install-plugin</artifactId>
            <version>2.5.2</version>
            <configuration>
                <skip>true</skip>
            </configuration>
        </plugin>

        <plugin>
            <groupId>org.codehaus.mojo</groupId>
            <artifactId>versions-maven-plugin</artifactId>
            <version>2.8.1</version>
            <configuration>
                <generateBackupPoms>false</generateBackupPoms>
            </configuration>
        </plugin>

        <plugin>
            <groupId>org.apache.maven.plugins</groupId>
            <artifactId>maven-shade-plugin</artifactId>
            <version>3.2.4</version>
            <configuration>
                <shadedArtifactAttached>false</shadedArtifactAttached>
                <promoteTransitiveDependencies>true</promoteTransitiveDependencies>
                <outputFile>${anso.jar}</outputFile>
                <artifactSet>
                    <includes>
                        <include>*:*</include>
                    </includes>
                    <excludes>
                        <exclude>org.apache.hadoop:*</exclude>
                        <exclude>org.apache.spark:*</exclude>
                        <exclude>org.apache.hive:*</exclude>
                        <exclude>org.apache.hive.shims:*</exclude>
                        <exclude>org.apache.parquet:*</exclude>
                        <exclude>com.fasterxml.jackson:*</exclude>
                        <exclude>org.apache.commons:*</exclude>
                        <exclude>org.slf4j:*</exclude>
                        <exclude>log4j:log4j</exclude>
                        <exclude>commons-cli:commons-cli</exclude>
                        <exclude>commons-lang:commons-lang</exclude>
                        <exclude>org.apache.avro:*</exclude>
                        <exclude>org.apache.orc:*</exclude>
                        <exclude>org.glassfish:*</exclude>
                        <exclude>org.apache.zookeeper:*</exclude>
                        <exclude>com.google.guava:guava</exclude>
                        <exclude>com.google.protobuf:*</exclude>
                        <exclude>org.scala-lang:*</exclude>
                        <exclude>io.netty:*</exclude>
                    </excludes>
                </artifactSet>
                <filters>
                    <filter>
                        <artifact>*:*</artifact>
                        <excludes>
                            <exclude>org/datanucleus/**</exclude>
                            <exclude>META-INF/*.SF</exclude>
                            <exclude>META-INF/*.DSA</exclude>
                            <exclude>META-INF/*.RSA</exclude>
                            <exclude>META-INF/vfs-providers.xml</exclude>
                            <exclude>io/netty/**</exclude>
                        </excludes>
                    </filter>
                </filters>
            </configuration>
            <executions>
                <execution>
                    <phase>package</phase>
                    <goals>
                        <goal>shade</goal>
                    </goals>
                    <configuration>
                        <transformers>
                            <transformer
                                    implementation="org.apache.maven.plugins.shade.resource.ManifestResourceTransformer">
                                <mainClass>com.anso.tess.Test1</mainClass>
                            </transformer>
                            <transformer
                                    implementation="org.apache.maven.plugins.shade.resource.DontIncludeResourceTransformer">
                                <resources>
                                    <resource>log4j.properties</resource>
                                    <resource>hdfs-site.xml</resource>
                                    <resource>hive-site.xml</resource>
                                    <resource>core-site.xml</resource>
                                </resources>
                            </transformer>
                        </transformers>
                    </configuration>
                </execution>
            </executions>
        </plugin>
    </plugins>
</build>

`

code==>

` package com.anso.tess

import org.apache.spark.sql.{DataFrame, Row, SaveMode, SparkSession}

/**

@ClassName Test1
@Deacription TODO
@Author zyong
@Date 2022/4/29 10:14
@Version 1.0
/ object Test1 extends App { private val session: SparkSession = SparkSession.builder() // .master("local[]") .appName(this.getClass.getName) .getOrCreate() session.sparkContext.setLogLevel("ERROR")

private def writeClickhouse(dataFrame: DataFrame, dbtable: String) { val prop = new java.util.Properties prop.setProperty("user", "default") prop.setProperty("password", "anso2022") prop.put("driver", "com.clickhouse.jdbc.ClickHouseDriver") val url: String = "jdbc:clickhouse://dev32:8123" dataFrame .write .mode(SaveMode.Append) .option("batchsize", "10000") .option("isolationLevel", "NONE") .option("numPartitions", "1") .jdbc(url, dbtable, prop) }

def writeJDBCWrite(dataFrame: DataFrame, dbtable: String): Unit = { val conn = ClickhouseUtils.getConn("dev32", 8123, "dev", "test1", "default", "anso2022") val sql: String = s""" |INSERT INTO $dbtable VALUES (?,?,?); |""".stripMargin val state = conn.prepareStatement(sql) dataFrame.collectAsList().forEach((t: Row) => { state.setInt(1, t.getAsInt) state.setString(2, t.getAsString) state.setInt(3, t.getAsInt) state.addBatch() }) state.executeBatch() }

import session.implicits._

val sourceDf = session.sparkContext.parallelize(Seq( (1, "Andy", 10), (2, "Andy1", 10), (3, "Andy2", 10), (4, "Andy3", 10), (5, "Andy3", 10), (6, "Andy3", 10), (7, "Andy3", 10), (8, "Andy3", 10), (9, "Andy3", 10), (10, "Andy3", 10), (11, "Andy3", 10), (12, "Andy3", 10), (13, "Andy3", 10), (14, "Andy3", 10), (15, "Andy3", 10), (16, "Andy3", 10), (17, "Andy3", 10), (18, "Andy3", 10), (19, "Andy3", 10), (20, "Andy3", 10), (21, "Andy3", 10), )).toDF("id", "name", "num") writeClickhouse(sourceDf, "dev.test1") // writeJDBCWrite(sourceDf, "dev.test1") }

`

Aload commented 2 years ago

Can you provide context and example to reproduce the issue?

We can add wechat communication if it is convenient for you

Aload commented 2 years ago

Can you provide context and example to reproduce the issue?

Very feel your reply, according to my check. This problem has been resolved and needs to be added in the configuration parameters for spark3.1.x run
The spark. SQL. DefaultUrlStreamHandlerFactory. Enabled = false will avoid the happening of the problem.

ClickHouse / clickhouse-java

org.apache.hadoop.fs.FsUrlConnection cannot be cast to java.net.HttpURLConnection #916