lucidworks / spark-solr

Tools for reading data from Solr as a Spark RDD and indexing objects from Spark into Solr using SolrJ.
Apache License 2.0
446 stars 250 forks source link

py4j.protocol.Py4JJavaError: An error occurred while calling o175.save. #323

Open salimuddin87 opened 3 years ago

salimuddin87 commented 3 years ago

Here is my code in pyspark 2.4.5. where i am trying to read from solr. `solr_conf = dict()

solr_conf["zkhost"] = index_config.SOLR_ZK_HOST
solr_conf["collection"] = solr_collection
solr_conf["batch_size"] = "100"

result_df.write.format("solr").options(**solr_conf).mode("overwrite").save()`

File "/local/apps/tmp/hadoop-egsadmin/nm-local-dir/usercache/misearch/appcache/application_1623714730645_0418/container_e19_1623714730645_0418_01_000001/pyspark.zip/pyspark/sql/readwriter.py", line 737, in save File "/local/apps/tmp/hadoop-egsadmin/nm-local-dir/usercache/misearch/appcache/application_1623714730645_0418/container_e19_1623714730645_0418_01_000001/py4j-0.10.7-src.zip/py4j/java_gateway.py", line 1257, in call File "/local/apps/tmp/hadoop-egsadmin/nm-local-dir/usercache/misearch/appcache/application_1623714730645_0418/container_e19_1623714730645_0418_01_000001/pyspark.zip/pyspark/sql/utils.py", line 63, in deco File "/local/apps/tmp/hadoop-egsadmin/nm-local-dir/usercache/misearch/appcache/application_1623714730645_0418/container_e19_1623714730645_0418_01_000001/py4j-0.10.7-src.zip/py4j/protocol.py", line 328, in get_return_value py4j.protocol.Py4JJavaError: An error occurred while calling o175.save. : org.apache.solr.client.solrj.impl.HttpSolrClient$RemoteExecutionException: Error from server at http://localhost:8983/solr/country-test: error processing commands at org.apache.solr.client.solrj.impl.HttpSolrClient$RemoteExecutionException.create(HttpSolrClient.java:829) at org.apache.solr.client.solrj.impl.HttpSolrClient.executeMethod(HttpSolrClient.java:620) at org.apache.solr.client.solrj.impl.HttpSolrClient.request(HttpSolrClient.java:255) at org.apache.solr.client.solrj.impl.HttpSolrClient.request(HttpSolrClient.java:244) at org.apache.solr.client.solrj.impl.LBHttpSolrClient.doRequest(LBHttpSolrClient.java:483) at org.apache.solr.client.solrj.impl.LBHttpSolrClient.request(LBHttpSolrClient.java:413) at org.apache.solr.client.solrj.impl.CloudSolrClient.sendRequest(CloudSolrClient.java:1106) at org.apache.solr.client.solrj.impl.CloudSolrClient.requestWithRetryOnStaleState(CloudSolrClient.java:886) at org.apache.solr.client.solrj.impl.CloudSolrClient.request(CloudSolrClient.java:819) at org.apache.solr.client.solrj.SolrRequest.process(SolrRequest.java:194) at com.lucidworks.spark.SolrRelation$.addFieldsForInsert(SolrRelation.scala:970) at com.lucidworks.spark.SolrRelation.insert(SolrRelation.scala:674) at solr.DefaultSource.createRelation(DefaultSource.scala:27) at org.apache.spark.sql.execution.datasources.SaveIntoDataSourceCommand.run(SaveIntoDataSourceCommand.scala:45) at org.apache.spark.sql.execution.command.ExecutedCommandExec.sideEffectResult$lzycompute(commands.scala:70) at org.apache.spark.sql.execution.command.ExecutedCommandExec.sideEffectResult(commands.scala:68) at org.apache.spark.sql.execution.command.ExecutedCommandExec.doExecute(commands.scala:86) at org.apache.spark.sql.execution.SparkPlan$$anonfun$execute$1.apply(SparkPlan.scala:131) at org.apache.spark.sql.execution.SparkPlan$$anonfun$execute$1.apply(SparkPlan.scala:127) at org.apache.spark.sql.execution.SparkPlan$$anonfun$executeQuery$1.apply(SparkPlan.scala:155) at org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:151) at org.apache.spark.sql.execution.SparkPlan.executeQuery(SparkPlan.scala:152) at org.apache.spark.sql.execution.SparkPlan.execute(SparkPlan.scala:127) at org.apache.spark.sql.execution.QueryExecution.toRdd$lzycompute(QueryExecution.scala:83) at org.apache.spark.sql.execution.QueryExecution.toRdd(QueryExecution.scala:81) at org.apache.spark.sql.DataFrameWriter$$anonfun$runCommand$1.apply(DataFrameWriter.scala:676) at org.apache.spark.sql.DataFrameWriter$$anonfun$runCommand$1.apply(DataFrameWriter.scala:676) at org.apache.spark.sql.execution.SQLExecution$$anonfun$withNewExecutionId$1.apply(SQLExecution.scala:80) at org.apache.spark.sql.execution.SQLExecution$.withSQLConfPropagated(SQLExecution.scala:127) at org.apache.spark.sql.execution.SQLExecution$.withNewExecutionId(SQLExecution.scala:75) at org.apache.spark.sql.DataFrameWriter.runCommand(DataFrameWriter.scala:676) at org.apache.spark.sql.DataFrameWriter.saveToV1Source(DataFrameWriter.scala:285) at org.apache.spark.sql.DataFrameWriter.save(DataFrameWriter.scala:271) at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method) at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62) at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43) at java.lang.reflect.Method.invoke(Method.java:498) at py4j.reflection.MethodInvoker.invoke(MethodInvoker.java:244) at py4j.reflection.ReflectionEngine.invoke(ReflectionEngine.java:357) at py4j.Gateway.invoke(Gateway.java:282) at py4j.commands.AbstractCommand.invokeMethod(AbstractCommand.java:132) at py4j.commands.CallCommand.execute(CallCommand.java:79) at py4j.GatewayConnection.run(GatewayConnection.java:238) at java.lang.Thread.run(Thread.java:748)