tensorflow / ecosystem

Integration of TensorFlow with other open-source frameworks
Apache License 2.0
1.37k stars 392 forks source link

Exception in thread "main" java.lang.ClassNotFoundException: Failed to find data source: tfrecords #152

Closed fuhailin closed 4 years ago

fuhailin commented 4 years ago

I want to use spark with scala to convert a dataFrame to TFRecord, Here is my example code TFRecordExample.scala:

import org.apache.spark.SparkConf
import org.apache.spark.sql.catalyst.expressions.GenericRow
import org.apache.spark.sql.types._
import org.apache.spark.sql.{DataFrame, Row, SparkSession}

object TFRecordExample {
  def main(args: Array[String]) {
    println("Hello, World!")
    val path = "test-output.tfrecord"
    val testRows: Array[Row] = Array(
      new GenericRow(Array[Any](11, 1, 23L, 10.0F, 14.0, List(1.0, 2.0), "r1")),
      new GenericRow(Array[Any](21, 2, 24L, 12.0F, 15.0, List(2.0, 2.0), "r2")))
    val schema = StructType(List(StructField("id", IntegerType),
      StructField("IntegerCol", IntegerType),
      StructField("LongCol", LongType),
      StructField("FloatCol", FloatType),
      StructField("DoubleCol", DoubleType),
      StructField("VectorCol", ArrayType(DoubleType, true)),
      StructField("StringCol", StringType)))

    // initialise spark context
    val conf = new SparkConf().setMaster("local[2]").setAppName(TFRecordExample.getClass.getName)
    val spark: SparkSession = SparkSession.builder.config(conf).getOrCreate()

    //  val spark = SparkSession.builder.appName("Simple Application").getOrCreate
    val rdd = spark.sparkContext.parallelize(testRows)

    //Save DataFrame as TFRecords
    val df: DataFrame = spark.createDataFrame(rdd, schema)
    df.show()

    df.write.format("tfrecords").option("recordType", "Example").save(path)

  }
}

here is my pom.xml

<?xml version="1.0" encoding="UTF-8"?>
<project xmlns="http://maven.apache.org/POM/4.0.0"
         xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
         xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
    <modelVersion>4.0.0</modelVersion>

    <groupId>org.example</groupId>
    <artifactId>TFRecordExample</artifactId>
    <version>1.0-SNAPSHOT</version>

    <dependencies>
        <dependency>
            <groupId>org.apache.spark</groupId>
            <artifactId>spark-core_2.12</artifactId>
            <version>2.4.5</version>
        </dependency>
        <dependency>
            <groupId>org.tensorflow</groupId>
            <artifactId>spark-tensorflow-connector_2.11</artifactId>
            <version>1.10.0</version>
        </dependency>
    </dependencies>
</project>

But the code get the following error:

/Library/Java/JavaVirtualMachines/adoptopenjdk-8.jdk/Contents/Home/bin/java "-javaagent:/Applications/IntelliJ IDEA.app/Contents/lib/idea_rt.jar=62755:/Applications/IntelliJ IDEA.app/Contents/bin" -Dfile.encoding=UTF-8 -classpath /Library/Java/JavaVirtualMachines/adoptopenjdk-8.jdk/Contents/Home/jre/lib/charsets.jar:/Library/Java/JavaVirtualMachines/adoptopenjdk-8.jdk/Contents/Home/jre/lib/ext/cldrdata.jar:/Library/Java/JavaVirtualMachines/adoptopenjdk-8.jdk/Contents/Home/jre/lib/ext/dnsns.jar:/Library/Java/JavaVirtualMachines/adoptopenjdk-8.jdk/Contents/Home/jre/lib/ext/jaccess.jar:/Library/Java/JavaVirtualMachines/adoptopenjdk-8.jdk/Contents/Home/jre/lib/ext/localedata.jar:/Library/Java/JavaVirtualMachines/adoptopenjdk-8.jdk/Contents/Home/jre/lib/ext/nashorn.jar:/Library/Java/JavaVirtualMachines/adoptopenjdk-8.jdk/Contents/Home/jre/lib/ext/sunec.jar:/Library/Java/JavaVirtualMachines/adoptopenjdk-8.jdk/Contents/Home/jre/lib/ext/sunjce_provider.jar:/Library/Java/JavaVirtualMachines/adoptopenjdk-8.jdk/Contents/Home/jre/lib/ext/sunpkcs11.jar:/Library/Java/JavaVirtualMachines/adoptopenjdk-8.jdk/Contents/Home/jre/lib/ext/zipfs.jar:/Library/Java/JavaVirtualMachines/adoptopenjdk-8.jdk/Contents/Home/jre/lib/jce.jar:/Library/Java/JavaVirtualMachines/adoptopenjdk-8.jdk/Contents/Home/jre/lib/jsse.jar:/Library/Java/JavaVirtualMachines/adoptopenjdk-8.jdk/Contents/Home/jre/lib/management-agent.jar:/Library/Java/JavaVirtualMachines/adoptopenjdk-8.jdk/Contents/Home/jre/lib/resources.jar:/Library/Java/JavaVirtualMachines/adoptopenjdk-8.jdk/Contents/Home/jre/lib/rt.jar:/Library/Java/JavaVirtualMachines/adoptopenjdk-8.jdk/Contents/Home/lib/dt.jar:/Library/Java/JavaVirtualMachines/adoptopenjdk-8.jdk/Contents/Home/lib/jconsole.jar:/Library/Java/JavaVirtualMachines/adoptopenjdk-8.jdk/Contents/Home/lib/sa-jdi.jar:/Library/Java/JavaVirtualMachines/adoptopenjdk-8.jdk/Contents/Home/lib/tools.jar:/Users/vincent/Documents/projects/show-me-scala-code/TFRecordExample/target/classes:/Users/vincent/opt/scala/scala-2.12.10/lib/scala-library.jar:/Users/vincent/opt/scala/scala-2.12.10/lib/scala-swing_2.12-2.0.3.jar:/Users/vincent/opt/scala/scala-2.12.10/lib/scala-reflect.jar:/Users/vincent/opt/scala/scala-2.12.10/lib/scala-parser-combinators_2.12-1.0.7.jar:/Users/vincent/opt/scala/scala-2.12.10/lib/scala-xml_2.12-1.0.6.jar:/Users/vincent/.m2/repository/org/apache/spark/spark-core_2.12/2.4.5/spark-core_2.12-2.4.5.jar:/Users/vincent/.m2/repository/com/thoughtworks/paranamer/paranamer/2.8/paranamer-2.8.jar:/Users/vincent/.m2/repository/org/apache/avro/avro/1.8.2/avro-1.8.2.jar:/Users/vincent/.m2/repository/org/codehaus/jackson/jackson-core-asl/1.9.13/jackson-core-asl-1.9.13.jar:/Users/vincent/.m2/repository/org/codehaus/jackson/jackson-mapper-asl/1.9.13/jackson-mapper-asl-1.9.13.jar:/Users/vincent/.m2/repository/org/apache/commons/commons-compress/1.8.1/commons-compress-1.8.1.jar:/Users/vincent/.m2/repository/org/tukaani/xz/1.5/xz-1.5.jar:/Users/vincent/.m2/repository/org/apache/avro/avro-mapred/1.8.2/avro-mapred-1.8.2-hadoop2.jar:/Users/vincent/.m2/repository/org/apache/avro/avro-ipc/1.8.2/avro-ipc-1.8.2.jar:/Users/vincent/.m2/repository/commons-codec/commons-codec/1.9/commons-codec-1.9.jar:/Users/vincent/.m2/repository/com/twitter/chill_2.12/0.9.3/chill_2.12-0.9.3.jar:/Users/vincent/.m2/repository/com/esotericsoftware/kryo-shaded/4.0.2/kryo-shaded-4.0.2.jar:/Users/vincent/.m2/repository/com/esotericsoftware/minlog/1.3.0/minlog-1.3.0.jar:/Users/vincent/.m2/repository/org/objenesis/objenesis/2.5.1/objenesis-2.5.1.jar:/Users/vincent/.m2/repository/com/twitter/chill-java/0.9.3/chill-java-0.9.3.jar:/Users/vincent/.m2/repository/org/apache/xbean/xbean-asm6-shaded/4.8/xbean-asm6-shaded-4.8.jar:/Users/vincent/.m2/repository/org/apache/hadoop/hadoop-client/2.6.5/hadoop-client-2.6.5.jar:/Users/vincent/.m2/repository/org/apache/hadoop/hadoop-common/2.6.5/hadoop-common-2.6.5.jar:/Users/vincent/.m2/repository/commons-cli/commons-cli/1.2/commons-cli-1.2.jar:/Users/vincent/.m2/repository/xmlenc/xmlenc/0.52/xmlenc-0.52.jar:/Users/vincent/.m2/repository/commons-httpclient/commons-httpclient/3.1/commons-httpclient-3.1.jar:/Users/vincent/.m2/repository/commons-io/commons-io/2.4/commons-io-2.4.jar:/Users/vincent/.m2/repository/commons-collections/commons-collections/3.2.2/commons-collections-3.2.2.jar:/Users/vincent/.m2/repository/commons-configuration/commons-configuration/1.6/commons-configuration-1.6.jar:/Users/vincent/.m2/repository/commons-digester/commons-digester/1.8/commons-digester-1.8.jar:/Users/vincent/.m2/repository/commons-beanutils/commons-beanutils/1.7.0/commons-beanutils-1.7.0.jar:/Users/vincent/.m2/repository/com/google/code/gson/gson/2.2.4/gson-2.2.4.jar:/Users/vincent/.m2/repository/org/apache/hadoop/hadoop-auth/2.6.5/hadoop-auth-2.6.5.jar:/Users/vincent/.m2/repository/org/apache/httpcomponents/httpclient/4.2.5/httpclient-4.2.5.jar:/Users/vincent/.m2/repository/org/apache/httpcomponents/httpcore/4.2.4/httpcore-4.2.4.jar:/Users/vincent/.m2/repository/org/apache/directory/server/apacheds-kerberos-codec/2.0.0-M15/apacheds-kerberos-codec-2.0.0-M15.jar:/Users/vincent/.m2/repository/org/apache/directory/server/apacheds-i18n/2.0.0-M15/apacheds-i18n-2.0.0-M15.jar:/Users/vincent/.m2/repository/org/apache/directory/api/api-asn1-api/1.0.0-M20/api-asn1-api-1.0.0-M20.jar:/Users/vincent/.m2/repository/org/apache/directory/api/api-util/1.0.0-M20/api-util-1.0.0-M20.jar:/Users/vincent/.m2/repository/org/apache/curator/curator-client/2.6.0/curator-client-2.6.0.jar:/Users/vincent/.m2/repository/org/htrace/htrace-core/3.0.4/htrace-core-3.0.4.jar:/Users/vincent/.m2/repository/org/apache/hadoop/hadoop-hdfs/2.6.5/hadoop-hdfs-2.6.5.jar:/Users/vincent/.m2/repository/org/mortbay/jetty/jetty-util/6.1.26/jetty-util-6.1.26.jar:/Users/vincent/.m2/repository/xerces/xercesImpl/2.9.1/xercesImpl-2.9.1.jar:/Users/vincent/.m2/repository/xml-apis/xml-apis/1.3.04/xml-apis-1.3.04.jar:/Users/vincent/.m2/repository/org/apache/hadoop/hadoop-mapreduce-client-app/2.6.5/hadoop-mapreduce-client-app-2.6.5.jar:/Users/vincent/.m2/repository/org/apache/hadoop/hadoop-mapreduce-client-common/2.6.5/hadoop-mapreduce-client-common-2.6.5.jar:/Users/vincent/.m2/repository/org/apache/hadoop/hadoop-yarn-client/2.6.5/hadoop-yarn-client-2.6.5.jar:/Users/vincent/.m2/repository/org/apache/hadoop/hadoop-yarn-server-common/2.6.5/hadoop-yarn-server-common-2.6.5.jar:/Users/vincent/.m2/repository/org/apache/hadoop/hadoop-mapreduce-client-shuffle/2.6.5/hadoop-mapreduce-client-shuffle-2.6.5.jar:/Users/vincent/.m2/repository/org/apache/hadoop/hadoop-yarn-api/2.6.5/hadoop-yarn-api-2.6.5.jar:/Users/vincent/.m2/repository/org/apache/hadoop/hadoop-mapreduce-client-core/2.6.5/hadoop-mapreduce-client-core-2.6.5.jar:/Users/vincent/.m2/repository/org/apache/hadoop/hadoop-yarn-common/2.6.5/hadoop-yarn-common-2.6.5.jar:/Users/vincent/.m2/repository/javax/xml/bind/jaxb-api/2.2.2/jaxb-api-2.2.2.jar:/Users/vincent/.m2/repository/javax/xml/stream/stax-api/1.0-2/stax-api-1.0-2.jar:/Users/vincent/.m2/repository/org/codehaus/jackson/jackson-jaxrs/1.9.13/jackson-jaxrs-1.9.13.jar:/Users/vincent/.m2/repository/org/codehaus/jackson/jackson-xc/1.9.13/jackson-xc-1.9.13.jar:/Users/vincent/.m2/repository/org/apache/hadoop/hadoop-mapreduce-client-jobclient/2.6.5/hadoop-mapreduce-client-jobclient-2.6.5.jar:/Users/vincent/.m2/repository/org/apache/hadoop/hadoop-annotations/2.6.5/hadoop-annotations-2.6.5.jar:/Users/vincent/.m2/repository/org/apache/spark/spark-launcher_2.12/2.4.5/spark-launcher_2.12-2.4.5.jar:/Users/vincent/.m2/repository/org/apache/spark/spark-kvstore_2.12/2.4.5/spark-kvstore_2.12-2.4.5.jar:/Users/vincent/.m2/repository/org/fusesource/leveldbjni/leveldbjni-all/1.8/leveldbjni-all-1.8.jar:/Users/vincent/.m2/repository/com/fasterxml/jackson/core/jackson-core/2.6.7/jackson-core-2.6.7.jar:/Users/vincent/.m2/repository/com/fasterxml/jackson/core/jackson-annotations/2.6.7/jackson-annotations-2.6.7.jar:/Users/vincent/.m2/repository/org/apache/spark/spark-network-common_2.12/2.4.5/spark-network-common_2.12-2.4.5.jar:/Users/vincent/.m2/repository/org/apache/spark/spark-network-shuffle_2.12/2.4.5/spark-network-shuffle_2.12-2.4.5.jar:/Users/vincent/.m2/repository/org/apache/spark/spark-unsafe_2.12/2.4.5/spark-unsafe_2.12-2.4.5.jar:/Users/vincent/.m2/repository/javax/activation/activation/1.1.1/activation-1.1.1.jar:/Users/vincent/.m2/repository/org/apache/curator/curator-recipes/2.6.0/curator-recipes-2.6.0.jar:/Users/vincent/.m2/repository/org/apache/curator/curator-framework/2.6.0/curator-framework-2.6.0.jar:/Users/vincent/.m2/repository/com/google/guava/guava/16.0.1/guava-16.0.1.jar:/Users/vincent/.m2/repository/org/apache/zookeeper/zookeeper/3.4.6/zookeeper-3.4.6.jar:/Users/vincent/.m2/repository/javax/servlet/javax.servlet-api/3.1.0/javax.servlet-api-3.1.0.jar:/Users/vincent/.m2/repository/org/apache/commons/commons-lang3/3.5/commons-lang3-3.5.jar:/Users/vincent/.m2/repository/org/apache/commons/commons-math3/3.4.1/commons-math3-3.4.1.jar:/Users/vincent/.m2/repository/com/google/code/findbugs/jsr305/1.3.9/jsr305-1.3.9.jar:/Users/vincent/.m2/repository/org/slf4j/slf4j-api/1.7.16/slf4j-api-1.7.16.jar:/Users/vincent/.m2/repository/org/slf4j/jul-to-slf4j/1.7.16/jul-to-slf4j-1.7.16.jar:/Users/vincent/.m2/repository/org/slf4j/jcl-over-slf4j/1.7.16/jcl-over-slf4j-1.7.16.jar:/Users/vincent/.m2/repository/log4j/log4j/1.2.17/log4j-1.2.17.jar:/Users/vincent/.m2/repository/org/slf4j/slf4j-log4j12/1.7.16/slf4j-log4j12-1.7.16.jar:/Users/vincent/.m2/repository/com/ning/compress-lzf/1.0.3/compress-lzf-1.0.3.jar:/Users/vincent/.m2/repository/org/xerial/snappy/snappy-java/1.1.7.3/snappy-java-1.1.7.3.jar:/Users/vincent/.m2/repository/org/lz4/lz4-java/1.4.0/lz4-java-1.4.0.jar:/Users/vincent/.m2/repository/com/github/luben/zstd-jni/1.3.2-2/zstd-jni-1.3.2-2.jar:/Users/vincent/.m2/repository/org/roaringbitmap/RoaringBitmap/0.7.45/RoaringBitmap-0.7.45.jar:/Users/vincent/.m2/repository/org/roaringbitmap/shims/0.7.45/shims-0.7.45.jar:/Users/vincent/.m2/repository/commons-net/commons-net/3.1/commons-net-3.1.jar:/Users/vincent/.m2/repository/org/scala-lang/scala-library/2.12.10/scala-library-2.12.10.jar:/Users/vincent/.m2/repository/org/json4s/json4s-jackson_2.12/3.5.3/json4s-jackson_2.12-3.5.3.jar:/Users/vincent/.m2/repository/org/json4s/json4s-core_2.12/3.5.3/json4s-core_2.12-3.5.3.jar:/Users/vincent/.m2/repository/org/json4s/json4s-ast_2.12/3.5.3/json4s-ast_2.12-3.5.3.jar:/Users/vincent/.m2/repository/org/json4s/json4s-scalap_2.12/3.5.3/json4s-scalap_2.12-3.5.3.jar:/Users/vincent/.m2/repository/org/scala-lang/modules/scala-xml_2.12/1.0.6/scala-xml_2.12-1.0.6.jar:/Users/vincent/.m2/repository/org/glassfish/jersey/core/jersey-client/2.22.2/jersey-client-2.22.2.jar:/Users/vincent/.m2/repository/javax/ws/rs/javax.ws.rs-api/2.0.1/javax.ws.rs-api-2.0.1.jar:/Users/vincent/.m2/repository/org/glassfish/hk2/hk2-api/2.4.0-b34/hk2-api-2.4.0-b34.jar:/Users/vincent/.m2/repository/org/glassfish/hk2/hk2-utils/2.4.0-b34/hk2-utils-2.4.0-b34.jar:/Users/vincent/.m2/repository/org/glassfish/hk2/external/aopalliance-repackaged/2.4.0-b34/aopalliance-repackaged-2.4.0-b34.jar:/Users/vincent/.m2/repository/org/glassfish/hk2/external/javax.inject/2.4.0-b34/javax.inject-2.4.0-b34.jar:/Users/vincent/.m2/repository/org/glassfish/hk2/hk2-locator/2.4.0-b34/hk2-locator-2.4.0-b34.jar:/Users/vincent/.m2/repository/org/javassist/javassist/3.18.1-GA/javassist-3.18.1-GA.jar:/Users/vincent/.m2/repository/org/glassfish/jersey/core/jersey-common/2.22.2/jersey-common-2.22.2.jar:/Users/vincent/.m2/repository/javax/annotation/javax.annotation-api/1.2/javax.annotation-api-1.2.jar:/Users/vincent/.m2/repository/org/glassfish/jersey/bundles/repackaged/jersey-guava/2.22.2/jersey-guava-2.22.2.jar:/Users/vincent/.m2/repository/org/glassfish/hk2/osgi-resource-locator/1.0.1/osgi-resource-locator-1.0.1.jar:/Users/vincent/.m2/repository/org/glassfish/jersey/core/jersey-server/2.22.2/jersey-server-2.22.2.jar:/Users/vincent/.m2/repository/org/glassfish/jersey/media/jersey-media-jaxb/2.22.2/jersey-media-jaxb-2.22.2.jar:/Users/vincent/.m2/repository/javax/validation/validation-api/1.1.0.Final/validation-api-1.1.0.Final.jar:/Users/vincent/.m2/repository/org/glassfish/jersey/containers/jersey-container-servlet/2.22.2/jersey-container-servlet-2.22.2.jar:/Users/vincent/.m2/repository/org/glassfish/jersey/containers/jersey-container-servlet-core/2.22.2/jersey-container-servlet-core-2.22.2.jar:/Users/vincent/.m2/repository/io/netty/netty-all/4.1.42.Final/netty-all-4.1.42.Final.jar:/Users/vincent/.m2/repository/io/netty/netty/3.9.9.Final/netty-3.9.9.Final.jar:/Users/vincent/.m2/repository/com/clearspring/analytics/stream/2.7.0/stream-2.7.0.jar:/Users/vincent/.m2/repository/io/dropwizard/metrics/metrics-core/3.1.5/metrics-core-3.1.5.jar:/Users/vincent/.m2/repository/io/dropwizard/metrics/metrics-jvm/3.1.5/metrics-jvm-3.1.5.jar:/Users/vincent/.m2/repository/io/dropwizard/metrics/metrics-json/3.1.5/metrics-json-3.1.5.jar:/Users/vincent/.m2/repository/io/dropwizard/metrics/metrics-graphite/3.1.5/metrics-graphite-3.1.5.jar:/Users/vincent/.m2/repository/com/fasterxml/jackson/core/jackson-databind/2.6.7.3/jackson-databind-2.6.7.3.jar:/Users/vincent/.m2/repository/com/fasterxml/jackson/module/jackson-module-scala_2.12/2.6.7.1/jackson-module-scala_2.12-2.6.7.1.jar:/Users/vincent/.m2/repository/org/scala-lang/scala-reflect/2.12.1/scala-reflect-2.12.1.jar:/Users/vincent/.m2/repository/com/fasterxml/jackson/module/jackson-module-paranamer/2.7.9/jackson-module-paranamer-2.7.9.jar:/Users/vincent/.m2/repository/org/apache/ivy/ivy/2.4.0/ivy-2.4.0.jar:/Users/vincent/.m2/repository/oro/oro/2.0.8/oro-2.0.8.jar:/Users/vincent/.m2/repository/net/razorvine/pyrolite/4.13/pyrolite-4.13.jar:/Users/vincent/.m2/repository/net/sf/py4j/py4j/0.10.7/py4j-0.10.7.jar:/Users/vincent/.m2/repository/org/apache/spark/spark-tags_2.12/2.4.5/spark-tags_2.12-2.4.5.jar:/Users/vincent/.m2/repository/org/apache/commons/commons-crypto/1.0.0/commons-crypto-1.0.0.jar:/Users/vincent/.m2/repository/org/spark-project/spark/unused/1.0.0/unused-1.0.0.jar:/Users/vincent/.m2/repository/org/apache/spark/spark-streaming_2.12/2.4.5/spark-streaming_2.12-2.4.5.jar:/Users/vincent/.m2/repository/org/apache/spark/spark-sql_2.12/2.4.5/spark-sql_2.12-2.4.5.jar:/Users/vincent/.m2/repository/com/univocity/univocity-parsers/2.7.3/univocity-parsers-2.7.3.jar:/Users/vincent/.m2/repository/org/apache/spark/spark-sketch_2.12/2.4.5/spark-sketch_2.12-2.4.5.jar:/Users/vincent/.m2/repository/org/apache/spark/spark-catalyst_2.12/2.4.5/spark-catalyst_2.12-2.4.5.jar:/Users/vincent/.m2/repository/org/scala-lang/modules/scala-parser-combinators_2.12/1.1.0/scala-parser-combinators_2.12-1.1.0.jar:/Users/vincent/.m2/repository/org/codehaus/janino/janino/3.0.9/janino-3.0.9.jar:/Users/vincent/.m2/repository/org/codehaus/janino/commons-compiler/3.0.9/commons-compiler-3.0.9.jar:/Users/vincent/.m2/repository/org/antlr/antlr4-runtime/4.7/antlr4-runtime-4.7.jar:/Users/vincent/.m2/repository/org/apache/orc/orc-core/1.5.5/orc-core-1.5.5-nohive.jar:/Users/vincent/.m2/repository/org/apache/orc/orc-shims/1.5.5/orc-shims-1.5.5.jar:/Users/vincent/.m2/repository/com/google/protobuf/protobuf-java/2.5.0/protobuf-java-2.5.0.jar:/Users/vincent/.m2/repository/commons-lang/commons-lang/2.6/commons-lang-2.6.jar:/Users/vincent/.m2/repository/io/airlift/aircompressor/0.10/aircompressor-0.10.jar:/Users/vincent/.m2/repository/org/apache/orc/orc-mapreduce/1.5.5/orc-mapreduce-1.5.5-nohive.jar:/Users/vincent/.m2/repository/org/apache/parquet/parquet-column/1.10.1/parquet-column-1.10.1.jar:/Users/vincent/.m2/repository/org/apache/parquet/parquet-common/1.10.1/parquet-common-1.10.1.jar:/Users/vincent/.m2/repository/org/apache/parquet/parquet-encoding/1.10.1/parquet-encoding-1.10.1.jar:/Users/vincent/.m2/repository/org/apache/parquet/parquet-hadoop/1.10.1/parquet-hadoop-1.10.1.jar:/Users/vincent/.m2/repository/org/apache/parquet/parquet-format/2.4.0/parquet-format-2.4.0.jar:/Users/vincent/.m2/repository/org/apache/parquet/parquet-jackson/1.10.1/parquet-jackson-1.10.1.jar:/Users/vincent/.m2/repository/org/apache/arrow/arrow-vector/0.10.0/arrow-vector-0.10.0.jar:/Users/vincent/.m2/repository/org/apache/arrow/arrow-format/0.10.0/arrow-format-0.10.0.jar:/Users/vincent/.m2/repository/org/apache/arrow/arrow-memory/0.10.0/arrow-memory-0.10.0.jar:/Users/vincent/.m2/repository/joda-time/joda-time/2.9.9/joda-time-2.9.9.jar:/Users/vincent/.m2/repository/com/carrotsearch/hppc/0.7.2/hppc-0.7.2.jar:/Users/vincent/.m2/repository/com/vlkan/flatbuffers/1.2.0-3f79e055/flatbuffers-1.2.0-3f79e055.jar TFRecordExample --jars src/spark-tensorflow-connector_2.11-1.10.0.jar
Hello, World!
Using Spark's default log4j profile: org/apache/spark/log4j-defaults.properties
20/03/12 22:10:54 WARN Utils: Your hostname, zhaodabaos-MacBook-Pro.local resolves to a loopback address: 127.0.0.1; using 192.168.0.102 instead (on interface en0)
20/03/12 22:10:54 WARN Utils: Set SPARK_LOCAL_IP if you need to bind to another address
20/03/12 22:10:54 INFO SparkContext: Running Spark version 2.4.5
20/03/12 22:10:54 WARN NativeCodeLoader: Unable to load native-hadoop library for your platform... using builtin-java classes where applicable
20/03/12 22:10:54 INFO SparkContext: Submitted application: TFRecordExample$
20/03/12 22:10:54 INFO SecurityManager: Changing view acls to: vincent
20/03/12 22:10:54 INFO SecurityManager: Changing modify acls to: vincent
20/03/12 22:10:54 INFO SecurityManager: Changing view acls groups to: 
20/03/12 22:10:54 INFO SecurityManager: Changing modify acls groups to: 
20/03/12 22:10:54 INFO SecurityManager: SecurityManager: authentication disabled; ui acls disabled; users  with view permissions: Set(vincent); groups with view permissions: Set(); users  with modify permissions: Set(vincent); groups with modify permissions: Set()
20/03/12 22:10:55 INFO Utils: Successfully started service 'sparkDriver' on port 62766.
20/03/12 22:10:55 INFO SparkEnv: Registering MapOutputTracker
20/03/12 22:10:55 INFO SparkEnv: Registering BlockManagerMaster
20/03/12 22:10:55 INFO BlockManagerMasterEndpoint: Using org.apache.spark.storage.DefaultTopologyMapper for getting topology information
20/03/12 22:10:55 INFO BlockManagerMasterEndpoint: BlockManagerMasterEndpoint up
20/03/12 22:10:55 INFO DiskBlockManager: Created local directory at /private/var/folders/y_/776qjjb16ms4wyghx_6jrrjw0000gn/T/blockmgr-2d72fcf7-2085-4b35-ab30-e7596011d48f
20/03/12 22:10:55 INFO MemoryStore: MemoryStore started with capacity 2004.6 MB
20/03/12 22:10:55 INFO SparkEnv: Registering OutputCommitCoordinator
20/03/12 22:10:55 INFO Utils: Successfully started service 'SparkUI' on port 4040.
20/03/12 22:10:55 INFO SparkUI: Bound SparkUI to 0.0.0.0, and started at http://192.168.0.102:4040
20/03/12 22:10:55 INFO Executor: Starting executor ID driver on host localhost
20/03/12 22:10:55 INFO Utils: Successfully started service 'org.apache.spark.network.netty.NettyBlockTransferService' on port 62767.
20/03/12 22:10:55 INFO NettyBlockTransferService: Server created on 192.168.0.102:62767
20/03/12 22:10:55 INFO BlockManager: Using org.apache.spark.storage.RandomBlockReplicationPolicy for block replication policy
20/03/12 22:10:55 INFO BlockManagerMaster: Registering BlockManager BlockManagerId(driver, 192.168.0.102, 62767, None)
20/03/12 22:10:55 INFO BlockManagerMasterEndpoint: Registering block manager 192.168.0.102:62767 with 2004.6 MB RAM, BlockManagerId(driver, 192.168.0.102, 62767, None)
20/03/12 22:10:55 INFO BlockManagerMaster: Registered BlockManager BlockManagerId(driver, 192.168.0.102, 62767, None)
20/03/12 22:10:55 INFO BlockManager: Initialized BlockManager: BlockManagerId(driver, 192.168.0.102, 62767, None)
20/03/12 22:10:57 INFO SharedState: Setting hive.metastore.warehouse.dir ('null') to the value of spark.sql.warehouse.dir ('file:/Users/vincent/Documents/projects/show-me-scala-code/TFRecordExample/spark-warehouse').
20/03/12 22:10:57 INFO SharedState: Warehouse path is 'file:/Users/vincent/Documents/projects/show-me-scala-code/TFRecordExample/spark-warehouse'.
20/03/12 22:10:57 INFO StateStoreCoordinatorRef: Registered StateStoreCoordinator endpoint
20/03/12 22:10:58 INFO CodeGenerator: Code generated in 142.026826 ms
20/03/12 22:10:58 INFO CodeGenerator: Code generated in 25.187387 ms
20/03/12 22:10:58 INFO SparkContext: Starting job: show at TFRecordExample.scala:32
20/03/12 22:10:58 INFO DAGScheduler: Got job 0 (show at TFRecordExample.scala:32) with 1 output partitions
20/03/12 22:10:58 INFO DAGScheduler: Final stage: ResultStage 0 (show at TFRecordExample.scala:32)
20/03/12 22:10:58 INFO DAGScheduler: Parents of final stage: List()
20/03/12 22:10:58 INFO DAGScheduler: Missing parents: List()
20/03/12 22:10:58 INFO DAGScheduler: Submitting ResultStage 0 (MapPartitionsRDD[5] at show at TFRecordExample.scala:32), which has no missing parents
20/03/12 22:10:58 INFO MemoryStore: Block broadcast_0 stored as values in memory (estimated size 20.0 KB, free 2004.6 MB)
20/03/12 22:10:58 INFO MemoryStore: Block broadcast_0_piece0 stored as bytes in memory (estimated size 8.5 KB, free 2004.6 MB)
20/03/12 22:10:58 INFO BlockManagerInfo: Added broadcast_0_piece0 in memory on 192.168.0.102:62767 (size: 8.5 KB, free: 2004.6 MB)
20/03/12 22:10:58 INFO SparkContext: Created broadcast 0 from broadcast at DAGScheduler.scala:1163
20/03/12 22:10:58 INFO DAGScheduler: Submitting 1 missing tasks from ResultStage 0 (MapPartitionsRDD[5] at show at TFRecordExample.scala:32) (first 15 tasks are for partitions Vector(0))
20/03/12 22:10:58 INFO TaskSchedulerImpl: Adding task set 0.0 with 1 tasks
20/03/12 22:10:58 INFO TaskSetManager: Starting task 0.0 in stage 0.0 (TID 0, localhost, executor driver, partition 0, PROCESS_LOCAL, 7812 bytes)
20/03/12 22:10:58 INFO Executor: Running task 0.0 in stage 0.0 (TID 0)
20/03/12 22:10:58 INFO CodeGenerator: Code generated in 13.494212 ms
20/03/12 22:10:59 INFO CodeGenerator: Code generated in 32.2272 ms
20/03/12 22:10:59 INFO Executor: Finished task 0.0 in stage 0.0 (TID 0). 1204 bytes result sent to driver
20/03/12 22:10:59 INFO TaskSetManager: Finished task 0.0 in stage 0.0 (TID 0) in 388 ms on localhost (executor driver) (1/1)
20/03/12 22:10:59 INFO TaskSchedulerImpl: Removed TaskSet 0.0, whose tasks have all completed, from pool 
20/03/12 22:10:59 INFO DAGScheduler: ResultStage 0 (show at TFRecordExample.scala:32) finished in 0.649 s
20/03/12 22:10:59 INFO DAGScheduler: Job 0 finished: show at TFRecordExample.scala:32, took 0.682568 s
20/03/12 22:10:59 INFO SparkContext: Starting job: show at TFRecordExample.scala:32
20/03/12 22:10:59 INFO DAGScheduler: Got job 1 (show at TFRecordExample.scala:32) with 1 output partitions
20/03/12 22:10:59 INFO DAGScheduler: Final stage: ResultStage 1 (show at TFRecordExample.scala:32)
20/03/12 22:10:59 INFO DAGScheduler: Parents of final stage: List()
20/03/12 22:10:59 INFO DAGScheduler: Missing parents: List()
20/03/12 22:10:59 INFO DAGScheduler: Submitting ResultStage 1 (MapPartitionsRDD[5] at show at TFRecordExample.scala:32), which has no missing parents
20/03/12 22:10:59 INFO MemoryStore: Block broadcast_1 stored as values in memory (estimated size 20.0 KB, free 2004.6 MB)
20/03/12 22:10:59 INFO MemoryStore: Block broadcast_1_piece0 stored as bytes in memory (estimated size 8.5 KB, free 2004.5 MB)
20/03/12 22:10:59 INFO BlockManagerInfo: Added broadcast_1_piece0 in memory on 192.168.0.102:62767 (size: 8.5 KB, free: 2004.6 MB)
20/03/12 22:10:59 INFO SparkContext: Created broadcast 1 from broadcast at DAGScheduler.scala:1163
20/03/12 22:10:59 INFO DAGScheduler: Submitting 1 missing tasks from ResultStage 1 (MapPartitionsRDD[5] at show at TFRecordExample.scala:32) (first 15 tasks are for partitions Vector(1))
20/03/12 22:10:59 INFO TaskSchedulerImpl: Adding task set 1.0 with 1 tasks
20/03/12 22:10:59 INFO TaskSetManager: Starting task 0.0 in stage 1.0 (TID 1, localhost, executor driver, partition 1, PROCESS_LOCAL, 7812 bytes)
20/03/12 22:10:59 INFO Executor: Running task 0.0 in stage 1.0 (TID 1)
20/03/12 22:10:59 INFO Executor: Finished task 0.0 in stage 1.0 (TID 1). 1163 bytes result sent to driver
20/03/12 22:10:59 INFO TaskSetManager: Finished task 0.0 in stage 1.0 (TID 1) in 22 ms on localhost (executor driver) (1/1)
20/03/12 22:10:59 INFO TaskSchedulerImpl: Removed TaskSet 1.0, whose tasks have all completed, from pool 
20/03/12 22:10:59 INFO DAGScheduler: ResultStage 1 (show at TFRecordExample.scala:32) finished in 0.029 s
20/03/12 22:10:59 INFO DAGScheduler: Job 1 finished: show at TFRecordExample.scala:32, took 0.031846 s
+---+----------+-------+--------+---------+----------+---------+
| id|IntegerCol|LongCol|FloatCol|DoubleCol| VectorCol|StringCol|
+---+----------+-------+--------+---------+----------+---------+
| 11|         1|     23|    10.0|     14.0|[1.0, 2.0]|       r1|
| 21|         2|     24|    12.0|     15.0|[2.0, 2.0]|       r2|
+---+----------+-------+--------+---------+----------+---------+

Exception in thread "main" java.lang.ClassNotFoundException: Failed to find data source: tfrecords. Please find packages at http://spark.apache.org/third-party-projects.html
    at org.apache.spark.sql.execution.datasources.DataSource$.lookupDataSource(DataSource.scala:660)
    at org.apache.spark.sql.DataFrameWriter.save(DataFrameWriter.scala:245)
    at org.apache.spark.sql.DataFrameWriter.save(DataFrameWriter.scala:229)
    at TFRecordExample$.main(TFRecordExample.scala:34)
    at TFRecordExample.main(TFRecordExample.scala)
Caused by: java.lang.ClassNotFoundException: tfrecords.DefaultSource
    at java.net.URLClassLoader.findClass(URLClassLoader.java:382)
    at java.lang.ClassLoader.loadClass(ClassLoader.java:419)
    at sun.misc.Launcher$AppClassLoader.loadClass(Launcher.java:352)
    at java.lang.ClassLoader.loadClass(ClassLoader.java:352)
    at org.apache.spark.sql.execution.datasources.DataSource$.$anonfun$lookupDataSource$5(DataSource.scala:634)
    at scala.util.Try$.apply(Try.scala:213)
    at org.apache.spark.sql.execution.datasources.DataSource$.$anonfun$lookupDataSource$4(DataSource.scala:634)
    at scala.util.Failure.orElse(Try.scala:224)
    at org.apache.spark.sql.execution.datasources.DataSource$.lookupDataSource(DataSource.scala:634)
    ... 4 more
20/03/12 22:10:59 INFO SparkContext: Invoking stop() from shutdown hook
20/03/12 22:10:59 INFO SparkUI: Stopped Spark web UI at http://192.168.0.102:4040
20/03/12 22:10:59 INFO MapOutputTrackerMasterEndpoint: MapOutputTrackerMasterEndpoint stopped!
20/03/12 22:10:59 INFO MemoryStore: MemoryStore cleared
20/03/12 22:10:59 INFO BlockManager: BlockManager stopped
20/03/12 22:10:59 INFO BlockManagerMaster: BlockManagerMaster stopped
20/03/12 22:10:59 INFO OutputCommitCoordinator$OutputCommitCoordinatorEndpoint: OutputCommitCoordinator stopped!
20/03/12 22:10:59 INFO SparkContext: Successfully stopped SparkContext
20/03/12 22:10:59 INFO ShutdownHookManager: Shutdown hook called
20/03/12 22:10:59 INFO ShutdownHookManager: Deleting directory /private/var/folders/y_/776qjjb16ms4wyghx_6jrrjw0000gn/T/spark-7895f927-6732-4f4f-a838-5708e9a7e0f3

Process finished with exit code 1

My Spark version is 2.4.5

sdkramer10 commented 4 years ago

@fuhailin How did you resolve this error? I am seeing the same issue.

pjonnalahub commented 3 years ago

any update on this issue? I was able to run the code in spark-shell but getting an issue in IntelliJ. Please share if you resolved

zuston commented 3 years ago

Get the same exception. Any update?

fay111101 commented 3 years ago

Get the same exception. Any update?