Closed Aload closed 2 years ago
Can you provide context and example to reproduce the issue?
Can you provide context and example to reproduce the issue?
OK
env: Spark:3.1.2 clickhouse:22.2.2 revision 54455 hadoop :CDH6.3.2-3.0.0
pom.xml ` <?xml version="1.0" encoding="UTF-8"?> <project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
<groupId>org.example</groupId>
<artifactId>TestClickhouse</artifactId>
<version>1.0-SNAPSHOT</version>
<properties>
<maven.compiler.source>8</maven.compiler.source>
<maven.compiler.target>8</maven.compiler.target>
<scala.version>2.12.10</scala.version>
<spark.version>3.1.2</spark.version>
<hadoop.version>2.7.3</hadoop.version>
<scala.binary.version>2.12</scala.binary.version>
</properties>
<dependencies>
<!--spark-start-->
<dependency>
<groupId>org.apache.spark</groupId>
<artifactId>spark-core_${scala.binary.version}</artifactId>
<version>${spark.version}</version>
</dependency>
<dependency>
<groupId>org.apache.spark</groupId>
<artifactId>spark-sql_${scala.binary.version}</artifactId>
<version>${spark.version}</version>
</dependency>
<!-- clickhouse -->
<dependency>
<!-- will stop using ru.yandex.clickhouse starting from 0.4.0 -->
<groupId>com.clickhouse</groupId>
<artifactId>clickhouse-jdbc</artifactId>
<version>0.3.2-patch8</version>
<!-- below is only needed when all you want is a shaded jar -->
<classifier>http</classifier>
<exclusions>
<exclusion>
<groupId>*</groupId>
<artifactId>*</artifactId>
</exclusion>
</exclusions>
</dependency>
<dependency>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-client</artifactId>
<version>${hadoop.version}</version>
<exclusions>
<exclusion>
<groupId>org.slf4j</groupId>
<artifactId>*</artifactId>
</exclusion>
<exclusion>
<groupId>log4j</groupId>
<artifactId>*</artifactId>
</exclusion>
</exclusions>
</dependency>
</dependencies>
<build>
<plugins>
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-deploy-plugin</artifactId>
<version>2.8.2</version>
<configuration>
<skip>true</skip>
</configuration>
</plugin>
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-install-plugin</artifactId>
<version>2.5.2</version>
<configuration>
<skip>true</skip>
</configuration>
</plugin>
<plugin>
<groupId>org.codehaus.mojo</groupId>
<artifactId>versions-maven-plugin</artifactId>
<version>2.8.1</version>
<configuration>
<generateBackupPoms>false</generateBackupPoms>
</configuration>
</plugin>
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-shade-plugin</artifactId>
<version>3.2.4</version>
<configuration>
<shadedArtifactAttached>false</shadedArtifactAttached>
<promoteTransitiveDependencies>true</promoteTransitiveDependencies>
<outputFile>${anso.jar}</outputFile>
<artifactSet>
<includes>
<include>*:*</include>
</includes>
<excludes>
<exclude>org.apache.hadoop:*</exclude>
<exclude>org.apache.spark:*</exclude>
<exclude>org.apache.hive:*</exclude>
<exclude>org.apache.hive.shims:*</exclude>
<exclude>org.apache.parquet:*</exclude>
<exclude>com.fasterxml.jackson:*</exclude>
<exclude>org.apache.commons:*</exclude>
<exclude>org.slf4j:*</exclude>
<exclude>log4j:log4j</exclude>
<exclude>commons-cli:commons-cli</exclude>
<exclude>commons-lang:commons-lang</exclude>
<exclude>org.apache.avro:*</exclude>
<exclude>org.apache.orc:*</exclude>
<exclude>org.glassfish:*</exclude>
<exclude>org.apache.zookeeper:*</exclude>
<exclude>com.google.guava:guava</exclude>
<exclude>com.google.protobuf:*</exclude>
<exclude>org.scala-lang:*</exclude>
<exclude>io.netty:*</exclude>
</excludes>
</artifactSet>
<filters>
<filter>
<artifact>*:*</artifact>
<excludes>
<exclude>org/datanucleus/**</exclude>
<exclude>META-INF/*.SF</exclude>
<exclude>META-INF/*.DSA</exclude>
<exclude>META-INF/*.RSA</exclude>
<exclude>META-INF/vfs-providers.xml</exclude>
<exclude>io/netty/**</exclude>
</excludes>
</filter>
</filters>
</configuration>
<executions>
<execution>
<phase>package</phase>
<goals>
<goal>shade</goal>
</goals>
<configuration>
<transformers>
<transformer
implementation="org.apache.maven.plugins.shade.resource.ManifestResourceTransformer">
<mainClass>com.anso.tess.Test1</mainClass>
</transformer>
<transformer
implementation="org.apache.maven.plugins.shade.resource.DontIncludeResourceTransformer">
<resources>
<resource>log4j.properties</resource>
<resource>hdfs-site.xml</resource>
<resource>hive-site.xml</resource>
<resource>core-site.xml</resource>
</resources>
</transformer>
</transformers>
</configuration>
</execution>
</executions>
</plugin>
</plugins>
</build>
`
code==>
` package com.anso.tess
import org.apache.spark.sql.{DataFrame, Row, SaveMode, SparkSession}
/**
/ object Test1 extends App { private val session: SparkSession = SparkSession.builder() // .master("local[]") .appName(this.getClass.getName) .getOrCreate() session.sparkContext.setLogLevel("ERROR")
private def writeClickhouse(dataFrame: DataFrame, dbtable: String) { val prop = new java.util.Properties prop.setProperty("user", "default") prop.setProperty("password", "anso2022") prop.put("driver", "com.clickhouse.jdbc.ClickHouseDriver") val url: String = "jdbc:clickhouse://dev32:8123" dataFrame .write .mode(SaveMode.Append) .option("batchsize", "10000") .option("isolationLevel", "NONE") .option("numPartitions", "1") .jdbc(url, dbtable, prop) }
def writeJDBCWrite(dataFrame: DataFrame, dbtable: String): Unit = { val conn = ClickhouseUtils.getConn("dev32", 8123, "dev", "test1", "default", "anso2022") val sql: String = s""" |INSERT INTO $dbtable VALUES (?,?,?); |""".stripMargin val state = conn.prepareStatement(sql) dataFrame.collectAsList().forEach((t: Row) => { state.setInt(1, t.getAsInt) state.setString(2, t.getAsString) state.setInt(3, t.getAsInt) state.addBatch() }) state.executeBatch() }
import session.implicits._
val sourceDf = session.sparkContext.parallelize(Seq( (1, "Andy", 10), (2, "Andy1", 10), (3, "Andy2", 10), (4, "Andy3", 10), (5, "Andy3", 10), (6, "Andy3", 10), (7, "Andy3", 10), (8, "Andy3", 10), (9, "Andy3", 10), (10, "Andy3", 10), (11, "Andy3", 10), (12, "Andy3", 10), (13, "Andy3", 10), (14, "Andy3", 10), (15, "Andy3", 10), (16, "Andy3", 10), (17, "Andy3", 10), (18, "Andy3", 10), (19, "Andy3", 10), (20, "Andy3", 10), (21, "Andy3", 10), )).toDF("id", "name", "num") writeClickhouse(sourceDf, "dev.test1") // writeJDBCWrite(sourceDf, "dev.test1") }
`
Can you provide context and example to reproduce the issue?
We can add wechat communication if it is convenient for you
Can you provide context and example to reproduce the issue?
Very feel your reply, according to my check. This problem has been resolved and needs to be added in the configuration parameters for spark3.1.x run
The spark. SQL. DefaultUrlStreamHandlerFactory. Enabled = false will avoid the happening of the problem.
version:spark3.1.2
Caused by: java.lang.ClassCastException: org.apache.hadoop.fs.FsUrlConnection cannot be cast to java.net.HttpURLConnection at com.clickhouse.client.http.HttpUrlConnectionImpl.newConnection(HttpUrlConnectionImpl.java:86) at com.clickhouse.client.http.HttpUrlConnectionImpl.(HttpUrlConnectionImpl.java:163)
at com.clickhouse.client.http.ClickHouseHttpConnectionFactory.createConnection(ClickHouseHttpConnectionFactory.java:12)
at com.clickhouse.client.http.ClickHouseHttpClient.newConnection(ClickHouseHttpClient.java:41)
at com.clickhouse.client.http.ClickHouseHttpClient.newConnection(ClickHouseHttpClient.java:23)
at com.clickhouse.client.AbstractClient.getConnection(AbstractClient.java:143)
at com.clickhouse.client.http.ClickHouseHttpClient.postRequest(ClickHouseHttpClient.java:74)
at com.clickhouse.client.http.ClickHouseHttpClient.execute(ClickHouseHttpClient.java:114)
at com.clickhouse.client.ClickHouseRequest.execute(ClickHouseRequest.java:1422)
at com.clickhouse.jdbc.internal.ClickHouseConnectionImpl.getServerInfo(ClickHouseConnectionImpl.java:77)