Closed annaf-data closed 2 days ago
Py4JJavaError Traceback (most recent call last) Cell In[19], line 1 ----> 1 sentences_df = add_sentence_column(data, "Decarbonisation_Barriers", DecarbonisationBarriersAll) 2 sentences_df.show(truncate=False)
Cell In[16], line 7, in add_sentence_column(dataframe, answers_column, labels) 1 def add_sentence_column(dataframe, answers_column,labels): 2 # Define the stages of the pipeline 3 document_assembler = DocumentAssembler() \ 4 .setInputCol(answers_column) \ 5 .setOutputCol("document") ----> 7 sentence_detector = SentenceDetectorDLModel.pretrained("sentence_detector_dl", "en") \ 8 .setInputCols(["document"]) \ 9 .setOutputCol("sentences") 11 classifier = ClassifierDLModel.pretrained("classifierdl_bertwiki_finance", "en") \ 12 .setInputCols(["sentences"]) \ 13 .setOutputCol("label") 15 # Create the pipeline
File ~/cluster-env/clonedenv/lib/python3.11/site-packages/sparknlp/annotator/sentence/sentence_detector_dl.py:467, in SentenceDetectorDLModel.pretrained(name, lang, remote_loc) 449 """Downloads and loads a pretrained model. 450 451 Parameters (...) 464 The restored model 465 """ 466 from sparknlp.pretrained import ResourceDownloader --> 467 return ResourceDownloader.downloadModel(SentenceDetectorDLModel, name, lang, remote_loc)
File ~/cluster-env/clonedenv/lib/python3.11/site-packages/sparknlp/pretrained/resource_downloader.py:86, in ResourceDownloader.downloadModel(reader, name, language, remote_loc, j_dwn)
63 """Downloads and loads a model with the default downloader. Usually this method
64 does not need to be called directly, as it is called by the pretrained()
65 method of the annotator.
(...)
83 Loaded pretrained annotator/pipeline
84 """
85 print(name + " download started this may take some time.")
---> 86 file_size = _internal._GetResourceSize(name, language, remote_loc).apply()
87 if file_size == "-1":
88 print("Can not find the model to download please check the name!")
File ~/cluster-env/clonedenv/lib/python3.11/site-packages/sparknlp/internal/init.py:668, in _GetResourceSize.init(self, name, language, remote_loc) 667 def init(self, name, language, remote_loc): --> 668 super(_GetResourceSize, self).init( 669 "com.johnsnowlabs.nlp.pretrained.PythonResourceDownloader.getDownloadSize", 670 name, 671 language, 672 remote_loc, 673 )
File ~/cluster-env/clonedenv/lib/python3.11/site-packages/sparknlp/internal/extended_java_wrapper.py:27, in ExtendedJavaWrapper.init(self, java_obj, args) 25 super(ExtendedJavaWrapper, self).init(java_obj) 26 self.sc = SparkContext._active_spark_context ---> 27 self._java_obj = self.new_java_obj(java_obj, args) 28 self.java_obj = self._java_obj
File ~/cluster-env/clonedenv/lib/python3.11/site-packages/sparknlp/internal/extended_java_wrapper.py:37, in ExtendedJavaWrapper.new_java_obj(self, java_class, args) 36 def new_java_obj(self, java_class, args): ---> 37 return self._new_java_obj(java_class, *args)
File /opt/spark/python/lib/pyspark.zip/pyspark/ml/wrapper.py:86, in JavaWrapper._new_java_obj(java_class, args) 84 java_obj = getattr(java_obj, name) 85 java_args = [_py2java(sc, arg) for arg in args] ---> 86 return java_obj(java_args)
File ~/cluster-env/clonedenv/lib/python3.11/site-packages/py4j/java_gateway.py:1322, in JavaMember.call(self, *args) 1316 command = proto.CALL_COMMAND_NAME +\ 1317 self.command_header +\ 1318 args_command +\ 1319 proto.END_COMMAND_PART 1321 answer = self.gateway_client.send_command(command) -> 1322 return_value = get_return_value( 1323 answer, self.gateway_client, self.target_id, self.name) 1325 for temp_arg in temp_args: 1326 if hasattr(temp_arg, "_detach"):
File /opt/spark/python/lib/pyspark.zip/pyspark/errors/exceptions/captured.py:179, in capture_sql_exception.
File ~/cluster-env/clonedenv/lib/python3.11/site-packages/py4j/protocol.py:326, in get_return_value(answer, gateway_client, target_id, name) 324 value = OUTPUT_CONVERTER[type](answer[2:], gateway_client) 325 if answer[1] == REFERENCE_TYPE: --> 326 raise Py4JJavaError( 327 "An error occurred while calling {0}{1}{2}.\n". 328 format(target_id, ".", name), value) 329 else: 330 raise Py4JError( 331 "An error occurred while calling {0}{1}{2}. Trace:\n{3}\n". 332 format(target_id, ".", name, value))
Py4JJavaError: An error occurred while calling z:com.johnsnowlabs.nlp.pretrained.PythonResourceDownloader.getDownloadSize.
: java.lang.ExceptionInInitializerError
at com.johnsnowlabs.nlp.pretrained.PythonResourceDownloader$.$anonfun$getDownloadSize$1(ResourceDownloader.scala:781)
at scala.Option.getOrElse(Option.scala:189)
at com.johnsnowlabs.nlp.pretrained.PythonResourceDownloader$.getDownloadSize(ResourceDownloader.scala:781)
at com.johnsnowlabs.nlp.pretrained.PythonResourceDownloader.getDownloadSize(ResourceDownloader.scala)
at java.base/jdk.internal.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
at java.base/jdk.internal.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)
at java.base/jdk.internal.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
at java.base/java.lang.reflect.Method.invoke(Method.java:566)
at py4j.reflection.MethodInvoker.invoke(MethodInvoker.java:244)
at py4j.reflection.ReflectionEngine.invoke(ReflectionEngine.java:374)
at py4j.Gateway.invoke(Gateway.java:282)
at py4j.commands.AbstractCommand.invokeMethod(AbstractCommand.java:132)
at py4j.commands.CallCommand.execute(CallCommand.java:79)
at py4j.GatewayConnection.run(GatewayConnection.java:238)
at java.base/java.lang.Thread.run(Thread.java:829)
Caused by: Operation failed: "Bad Request", 400, HEAD, http://onelake.dfs.fabric.microsoft.com/xxxxxxxxxxxxxxxxxxxxxx/xxxxxxxxxxxxxxxxxxxxxxxxxx/cache_pretrained?upn=false&action=getStatus&timeout=90
at org.apache.hadoop.fs.azurebfs.services.AbfsRestOperation.completeExecute(AbfsRestOperation.java:231)
at org.apache.hadoop.fs.azurebfs.services.AbfsRestOperation.lambda$execute$0(AbfsRestOperation.java:191)
at org.apache.hadoop.fs.statistics.impl.IOStatisticsBinding.trackDurationOfInvocation(IOStatisticsBinding.java:464)
at org.apache.hadoop.fs.azurebfs.services.AbfsRestOperation.execute(AbfsRestOperation.java:189)
at org.apache.hadoop.fs.azurebfs.services.AbfsClient.getPathStatus(AbfsClient.java:690)
at org.apache.hadoop.fs.azurebfs.AzureBlobFileSystemStore.getFileStatus(AzureBlobFileSystemStore.java:1067)
at org.apache.hadoop.fs.azurebfs.AzureBlobFileSystem.getFileStatus(AzureBlobFileSystem.java:650)
at org.apache.hadoop.fs.azurebfs.AzureBlobFileSystem.getFileStatus(AzureBlobFileSystem.java:640)
at org.apache.hadoop.fs.FileSystem.exists(FileSystem.java:1759)
at org.apache.hadoop.fs.azurebfs.AzureBlobFileSystem.exists(AzureBlobFileSystem.java:1236)
at com.johnsnowlabs.nlp.pretrained.S3ResourceDownloader.
I think I resolved this issue through adding spark.jsl.settings.pretrained.cache_folder property in Spark properties section of my environment and setting it up to the relative folder location in my repository (created the folder first).
now facing a different issue which is not related to this problem so I might close this one and open another one :) When I will make it all work I might do an instruction how to set up Spark NLP on Microsoft Fabric lol
Is there an existing issue for this?
Who can help?
No response
What are you working on?
I am working on sentence detector and classifier - internal project for the council to analyse text responses
Current Behavior
Each time I am ending in : Py4JJavaError: An error occurred while calling z:com.johnsnowlabs.nlp.pretrained.PythonResourceDownloader.getDownloadSize. : java.lang.ExceptionInInitializerErrorPy4JJavaError: An error occurred while calling z:com.johnsnowlabs.nlp.pretrained.PythonResourceDownloader.getDownloadSize. : java.lang.ExceptionInInitializerError
Expected Behavior
The function should run properly
Steps To Reproduce
Version A
Version B
Spark NLP version and Apache Spark
spark nlp 5.5.1 Microsoft Fabric - Spark 3.5
Type of Spark Application
No response
Java Version
Java: 11
Java Home Directory
not sure as it is Microsoft Fabric
Setup and installation
Environment library deployment
Operating System and Version
No response
Link to your project (if available)
No response
Additional Information
No response