maxpumperla / elephas

Distributed Deep learning with Keras & Spark
http://maxpumperla.com/elephas/
MIT License
1.57k stars 312 forks source link

Py4JJavaError: An error occurred while calling o258.fit. : java.lang.IllegalArgumentException #86

Closed dbl001 closed 6 years ago

dbl001 commented 6 years ago

I'm getting the error (above) running with: Spark_ML_Pipeline.ipynb from Jupyter Lab, on OS X 10.11.6. I'm running Spark v2.3.0-SNAPSHOT, Anaconda Python . The error happens with python version 2.11.3 and 3.6.

Any ideas?


 $ python --version
Python 2.7.13 :: Anaconda custom (x86_64)

SparkContext

Spark UI

Version
v2.3.0-SNAPSHOT
Master
local[*]
AppName
myAppName

Py4JJavaError                             Traceback (most recent call last)
<ipython-input-20-a56786b3aef1> in <module>()
      2 
      3 string_indexer = StringIndexer(inputCol="category", outputCol="index_category")
----> 4 fitted_indexer = string_indexer.fit(train_df)
      5 indexed_df = fitted_indexer.transform(train_df)

/Users/davidlaxer/spark/python/pyspark/ml/base.pyc in fit(self, dataset, params)
    130                 return self.copy(params)._fit(dataset)
    131             else:
--> 132                 return self._fit(dataset)
    133         else:
    134             raise ValueError("Params must be either a param map or a list/tuple of param maps, "

/Users/davidlaxer/spark/python/pyspark/ml/wrapper.pyc in _fit(self, dataset)
    286 
    287     def _fit(self, dataset):
--> 288         java_model = self._fit_java(dataset)
    289         model = self._create_model(java_model)
    290         return self._copyValues(model)

/Users/davidlaxer/spark/python/pyspark/ml/wrapper.pyc in _fit_java(self, dataset)
    283         """
    284         self._transfer_params_to_java()
--> 285         return self._java_obj.fit(dataset._jdf)
    286 
    287     def _fit(self, dataset):

/Users/davidlaxer/spark/python/lib/py4j-0.10.6-src.zip/py4j/java_gateway.py in __call__(self, *args)
   1158         answer = self.gateway_client.send_command(command)
   1159         return_value = get_return_value(
-> 1160             answer, self.gateway_client, self.target_id, self.name)
   1161 
   1162         for temp_arg in temp_args:

/Users/davidlaxer/spark/python/pyspark/sql/utils.pyc in deco(*a, **kw)
     61     def deco(*a, **kw):
     62         try:
---> 63             return f(*a, **kw)
     64         except py4j.protocol.Py4JJavaError as e:
     65             s = e.java_exception.toString()

/Users/davidlaxer/spark/python/lib/py4j-0.10.6-src.zip/py4j/protocol.py in get_return_value(answer, gateway_client, target_id, name)
    318                 raise Py4JJavaError(
    319                     "An error occurred while calling {0}{1}{2}.\n".
--> 320                     format(target_id, ".", name), value)
    321             else:
    322                 raise Py4JError(

Py4JJavaError: An error occurred while calling o258.fit.
: java.lang.IllegalArgumentException
    at org.apache.xbean.asm5.ClassReader.<init>(Unknown Source)
    at org.apache.xbean.asm5.ClassReader.<init>(Unknown Source)
    at org.apache.xbean.asm5.ClassReader.<init>(Unknown Source)
    at org.apache.spark.util.ClosureCleaner$.getClassReader(ClosureCleaner.scala:46)
    at org.apache.spark.util.FieldAccessFinder$$anon$3$$anonfun$visitMethodInsn$2.apply(ClosureCleaner.scala:449)
    at org.apache.spark.util.FieldAccessFinder$$anon$3$$anonfun$visitMethodInsn$2.apply(ClosureCleaner.scala:432)
    at scala.collection.TraversableLike$WithFilter$$anonfun$foreach$1.apply(TraversableLike.scala:733)
    at scala.collection.mutable.HashMap$$anon$1$$anonfun$foreach$2.apply(HashMap.scala:103)
    at scala.collection.mutable.HashMap$$anon$1$$anonfun$foreach$2.apply(HashMap.scala:103)
    at scala.collection.mutable.HashTable$class.foreachEntry(HashTable.scala:230)
    at scala.collection.mutable.HashMap.foreachEntry(HashMap.scala:40)
    at scala.collection.mutable.HashMap$$anon$1.foreach(HashMap.scala:103)
    at scala.collection.TraversableLike$WithFilter.foreach(TraversableLike.scala:732)
    at org.apache.spark.util.FieldAccessFinder$$anon$3.visitMethodInsn(ClosureCleaner.scala:432)
    at org.apache.xbean.asm5.ClassReader.a(Unknown Source)
    at org.apache.xbean.asm5.ClassReader.b(Unknown Source)
    at org.apache.xbean.asm5.ClassReader.accept(Unknown Source)
    at org.apache.xbean.asm5.ClassReader.accept(Unknown Source)
    at org.apache.spark.util.ClosureCleaner$$anonfun$org$apache$spark$util$ClosureCleaner$$clean$14.apply(ClosureCleaner.scala:262)
    at org.apache.spark.util.ClosureCleaner$$anonfun$org$apache$spark$util$ClosureCleaner$$clean$14.apply(ClosureCleaner.scala:261)
    at scala.collection.immutable.List.foreach(List.scala:381)
    at org.apache.spark.util.ClosureCleaner$.org$apache$spark$util$ClosureCleaner$$clean(ClosureCleaner.scala:261)
    at org.apache.spark.util.ClosureCleaner$.clean(ClosureCleaner.scala:159)
    at org.apache.spark.SparkContext.clean(SparkContext.scala:2292)
    at org.apache.spark.SparkContext.runJob(SparkContext.scala:2066)
    at org.apache.spark.SparkContext.runJob(SparkContext.scala:2092)
    at org.apache.spark.rdd.RDD$$anonfun$collect$1.apply(RDD.scala:937)
    at org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:151)
    at org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:112)
    at org.apache.spark.rdd.RDD.withScope(RDD.scala:363)
    at org.apache.spark.rdd.RDD.collect(RDD.scala:936)
    at org.apache.spark.rdd.PairRDDFunctions$$anonfun$countByKey$1.apply(PairRDDFunctions.scala:370)
    at org.apache.spark.rdd.PairRDDFunctions$$anonfun$countByKey$1.apply(PairRDDFunctions.scala:370)
    at org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:151)
    at org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:112)
    at org.apache.spark.rdd.RDD.withScope(RDD.scala:363)
    at org.apache.spark.rdd.PairRDDFunctions.countByKey(PairRDDFunctions.scala:369)
    at org.apache.spark.rdd.RDD$$anonfun$countByValue$1.apply(RDD.scala:1206)
    at org.apache.spark.rdd.RDD$$anonfun$countByValue$1.apply(RDD.scala:1206)
    at org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:151)
    at org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:112)
    at org.apache.spark.rdd.RDD.withScope(RDD.scala:363)
    at org.apache.spark.rdd.RDD.countByValue(RDD.scala:1205)
    at org.apache.spark.ml.feature.StringIndexer.fit(StringIndexer.scala:140)
    at java.base/jdk.internal.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
    at java.base/jdk.internal.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)
    at java.base/jdk.internal.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
    at java.base/java.lang.reflect.Method.invoke(Method.java:564)
    at py4j.reflection.MethodInvoker.invoke(MethodInvoker.java:244)
    at py4j.reflection.ReflectionEngine.invoke(ReflectionEngine.java:357)
    at py4j.Gateway.invoke(Gateway.java:282)
    at py4j.commands.AbstractCommand.invokeMethod(AbstractCommand.java:132)
    at py4j.commands.CallCommand.execute(CallCommand.java:79)
    at py4j.GatewayConnection.run(GatewayConnection.java:214)
    at java.base/java.lang.Thread.run(Thread.java:844)

screen shot 2018-04-25 at 1 00 50 pm screen shot 2018-04-25 at 12 58 55 pm screen shot 2018-04-25 at 12 58 45 pm

dbl001 commented 6 years ago

Java:

$ java --version
java 9.0.4
Java(TM) SE Runtime Environment (build 9.0.4+11)
Java HotSpot(TM) 64-Bit Server VM (build 9.0.4+11, mixed mode)

The files are in HDFS:

David-Laxers-MacBook-Pro:hadoop hduser$ bin/hadoop fs -ls /user/davidlaxer
WARNING: An illegal reflective access operation has occurred
WARNING: Illegal reflective access by org.apache.hadoop.security.authentication.util.KerberosUtil (file:/usr/local/hadoop/share/hadoop/common/lib/hadoop-auth-2.6.4.jar) to method sun.security.krb5.Config.getInstance()
WARNING: Please consider reporting this to the maintainers of org.apache.hadoop.security.authentication.util.KerberosUtil
WARNING: Use --illegal-access=warn to enable warnings of further illegal reflective access operations
WARNING: All illegal access operations will be denied in a future release
Found 2 items
-rw-r--r--   1 hduser supergroup   27912710 2018-04-25 12:14 /user/davidlaxer/test.csv
-rw-r--r--   1 hduser supergroup   12433387 2018-04-25 12:14 /user/davidlaxer/train.csv
maxpumperla commented 6 years ago

@dbl001 I have to guess here. it seems this comes down to this line:

at org.apache.spark.rdd.RDD.countByValue(RDD.scala:1205)

might be worth upgrading to Spark or downgrading to java 8 (pyspark is always a little brittle), see here: https://stackoverflow.com/questions/48603071/illegalargumentexception-with-spark-collect-on-jupyter?noredirect=1&lq=1&utm_medium=organic&utm_source=google_rich_qa&utm_campaign=google_rich_qa

in any case, I don't think this is really an elephas issue, but I'll leave this open anyway :)

dbl001 commented 6 years ago

Switching from Java 1.9 to Java 1.8 solved the issue with the StringIndexer. Now, I’m getting issues with the Scaler Any ideas?

IllegalArgumentException: u'requirement failed: Column features must be of type org.apache.spark.ml.linalg.VectorUDT@3bfc3ba7 but was actually org.apache.spark.mllib.linalg.VectorUDT@f71b0bce.’

and also:

ImportError: cannot import name keyword_only

On Apr 26, 2018, at 12:41 AM, Max Pumperla notifications@github.com wrote:

@dbl001 https://github.com/dbl001 I have to guess here. it seems this comes down to this line:

at org.apache.spark.rdd.RDD.countByValue(RDD.scala:1205) might be worth upgrading to Spark or downgrading to java 8 (pyspark is always a little brittle), see here: https://stackoverflow.com/questions/48603071/illegalargumentexception-with-spark-collect-on-jupyter?noredirect=1&lq=1&utm_medium=organic&utm_source=google_rich_qa&utm_campaign=google_rich_qa https://stackoverflow.com/questions/48603071/illegalargumentexception-with-spark-collect-on-jupyter?noredirect=1&lq=1&utm_medium=organic&utm_source=google_rich_qa&utm_campaign=google_rich_qa in any case, I don't think this is really an elephas issue, but I'll leave this open anyway :)

— You are receiving this because you were mentioned. Reply to this email directly, view it on GitHub https://github.com/maxpumperla/elephas/issues/86#issuecomment-384544636, or mute the thread https://github.com/notifications/unsubscribe-auth/AC9i22imhvu1ittIaxHey1IqgUrtf2Peks5tsXoUgaJpZM4TkEo3.

dbl001 commented 6 years ago

This change corrects the IllegalArgumentException:

from pyspark.sql import SQLContext

from pyspark.mllib.linalg import Vectors

from pyspark.ml.linalg import Vectors, VectorUDT

On Apr 26, 2018, at 1:11 PM, David Laxer davidl@softintel.com wrote:

Switching from Java 1.9 to Java 1.8 solved the issue with the StringIndexer. Now, I’m getting issues with the Scaler Any ideas?

IllegalArgumentException: u'requirement failed: Column features must be of type org.apache.spark.ml.linalg.VectorUDT@3bfc3ba7 but was actually org.apache.spark.mllib.linalg.VectorUDT@f71b0bce.’

<Screen Shot 2018-04-26 at 1.10.36 PM.png> and also:

ImportError: cannot import name keyword_only

<Screen Shot 2018-04-26 at 1.09.44 PM.png>

On Apr 26, 2018, at 12:41 AM, Max Pumperla <notifications@github.com mailto:notifications@github.com> wrote:

@dbl001 https://github.com/dbl001 I have to guess here. it seems this comes down to this line:

at org.apache.spark.rdd.RDD.countByValue(RDD.scala:1205) might be worth upgrading to Spark or downgrading to java 8 (pyspark is always a little brittle), see here: https://stackoverflow.com/questions/48603071/illegalargumentexception-with-spark-collect-on-jupyter?noredirect=1&lq=1&utm_medium=organic&utm_source=google_rich_qa&utm_campaign=google_rich_qa https://stackoverflow.com/questions/48603071/illegalargumentexception-with-spark-collect-on-jupyter?noredirect=1&lq=1&utm_medium=organic&utm_source=google_rich_qa&utm_campaign=google_rich_qa in any case, I don't think this is really an elephas issue, but I'll leave this open anyway :)

— You are receiving this because you were mentioned. Reply to this email directly, view it on GitHub https://github.com/maxpumperla/elephas/issues/86#issuecomment-384544636, or mute the thread https://github.com/notifications/unsubscribe-auth/AC9i22imhvu1ittIaxHey1IqgUrtf2Peks5tsXoUgaJpZM4TkEo3.

dbl001 commented 6 years ago

I downgraded Spark from vision 2.4 to Spark 1.63 (this solved many problems with Alphas). I’m running jupyterlab and Spark on an AWS EC2 instance.

I’m getting this connection error:

File "/home/ubuntu/anaconda/envs/spacy/lib/python3.6/urllib/request.py", line 1320, in do_open
    raise URLError(err)
urllib.error.URLError: <urlopen error [Errno 111] Connection refused>

Example Spark programs run fine. Hadoop 2.6.2 seems fine. Master port: 5000 is unblocked. Any ideas which port(s) might be blocked or what might be the cause of this exception?

from pyspark.mllib.evaluation import MulticlassMetrics

fitted_pipeline = pipeline.fit(train_df) # Fit model to data

prediction = fitted_pipeline.transform(train_df) # Evaluate on train data.
# prediction = fitted_pipeline.transform(test_df) # <-- The same code evaluates test data.
pnl = prediction.select("index_category", "prediction")
pnl.show(100)

prediction_and_label = pnl.map(lambda row: (row.index_category, row.prediction))
metrics = MulticlassMetrics(prediction_and_label)
print(metrics.precision())

Process Process-5:
  File "/home/ubuntu/anaconda/envs/spacy/lib/python3.6/site-packages/elephas-0.3-py3.6.egg/elephas/spark_model.py", line 171, in start_service
    threaded=True, use_reloader=False)
Traceback (most recent call last):
  File "/home/ubuntu/anaconda/envs/spacy/lib/python3.6/multiprocessing/process.py", line 258, in _bootstrap
    self.run()
  File "/home/ubuntu/anaconda/envs/spacy/lib/python3.6/multiprocessing/process.py", line 93, in run
    self._target(*self._args, **self._kwargs)
  File "/home/ubuntu/anaconda/envs/spacy/lib/python3.6/site-packages/Flask-1.0.2-py3.6.egg/flask/app.py", line 938, in run
    cli.show_server_banner(self.env, self.debug, self.name, False)
  File "/home/ubuntu/anaconda/envs/spacy/lib/python3.6/site-packages/Flask-1.0.2-py3.6.egg/flask/cli.py", line 629, in show_server_banner
    click.echo(message)
  File "/home/ubuntu/anaconda/envs/spacy/lib/python3.6/site-packages/click-6.7-py3.6.egg/click/utils.py", line 259, in echo
    file.write(message)
io.UnsupportedOperation: not writable
---------------------------------------------------------------------------
Py4JJavaError                             Traceback (most recent call last)
<ipython-input-18-63c8e305d765> in <module>()
      1 from pyspark.mllib.evaluation import MulticlassMetrics
      2 
----> 3 fitted_pipeline = pipeline.fit(train_df) # Fit model to data
      4 
      5 prediction = fitted_pipeline.transform(train_df) # Evaluate on train data.

~/spark/python/pyspark/ml/pipeline.py in fit(self, dataset, params)
     67                 return self.copy(params)._fit(dataset)
     68             else:
---> 69                 return self._fit(dataset)
     70         else:
     71             raise ValueError("Params must be either a param map or a list/tuple of param maps, "

~/spark/python/pyspark/ml/pipeline.py in _fit(self, dataset)
    211                     dataset = stage.transform(dataset)
    212                 else:  # must be an Estimator
--> 213                     model = stage.fit(dataset)
    214                     transformers.append(model)
    215                     if i < indexOfLastEstimator:

~/spark/python/pyspark/ml/pipeline.py in fit(self, dataset, params)
     67                 return self.copy(params)._fit(dataset)
     68             else:
---> 69                 return self._fit(dataset)
     70         else:
     71             raise ValueError("Params must be either a param map or a list/tuple of param maps, "

~/anaconda/envs/spacy/lib/python3.6/site-packages/elephas-0.3-py3.6.egg/elephas/ml_model.py in _fit(self, df)
     57                                  num_workers=self.get_num_workers())
     58         spark_model.train(simple_rdd, nb_epoch=self.get_nb_epoch(), batch_size=self.get_batch_size(),
---> 59                           verbose=self.get_verbosity(), validation_split=self.get_validation_split())
     60 
     61         model_weights = spark_model.master_network.get_weights()

~/anaconda/envs/spacy/lib/python3.6/site-packages/elephas-0.3-py3.6.egg/elephas/spark_model.py in train(self, rdd, nb_epoch, batch_size, verbose, validation_split)
    192 
    193         if self.mode in ['asynchronous', 'synchronous', 'hogwild']:
--> 194             self._train(rdd, nb_epoch, batch_size, verbose, validation_split, master_url)
    195         else:
    196             print("""Choose from one of the modes: asynchronous, synchronous or hogwild""")

~/anaconda/envs/spacy/lib/python3.6/site-packages/elephas-0.3-py3.6.egg/elephas/spark_model.py in _train(self, rdd, nb_epoch, batch_size, verbose, validation_split, master_url)
    212                 self.master_optimizer, self.master_loss, self.master_metrics, self.custom_objects
    213             )
--> 214             rdd.mapPartitions(worker.train).collect()
    215             new_parameters = get_server_weights(master_url)
    216         elif self.mode == 'synchronous':

~/spark/python/pyspark/rdd.py in collect(self)
    769         """
    770         with SCCallSiteSync(self.context) as css:
--> 771             port = self.ctx._jvm.PythonRDD.collectAndServe(self._jrdd.rdd())
    772         return list(_load_from_socket(port, self._jrdd_deserializer))
    773 

~/spark/python/lib/py4j-0.9-src.zip/py4j/java_gateway.py in __call__(self, *args)
    811         answer = self.gateway_client.send_command(command)
    812         return_value = get_return_value(
--> 813             answer, self.gateway_client, self.target_id, self.name)
    814 
    815         for temp_arg in temp_args:

~/spark/python/pyspark/sql/utils.py in deco(*a, **kw)
     43     def deco(*a, **kw):
     44         try:
---> 45             return f(*a, **kw)
     46         except py4j.protocol.Py4JJavaError as e:
     47             s = e.java_exception.toString()

~/spark/python/lib/py4j-0.9-src.zip/py4j/protocol.py in get_return_value(answer, gateway_client, target_id, name)
    306                 raise Py4JJavaError(
    307                     "An error occurred while calling {0}{1}{2}.\n".
--> 308                     format(target_id, ".", name), value)
    309             else:
    310                 raise Py4JError(

Py4JJavaError: An error occurred while calling z:org.apache.spark.api.python.PythonRDD.collectAndServe.
: org.apache.spark.SparkException: Job aborted due to stage failure: Task 0 in stage 43.0 failed 1 times, most recent failure: Lost task 0.0 in stage 43.0 (TID 268, localhost): org.apache.spark.api.python.PythonException: Traceback (most recent call last):
  File "/home/ubuntu/anaconda/envs/spacy/lib/python3.6/urllib/request.py", line 1318, in do_open
    encode_chunked=req.has_header('Transfer-encoding'))
  File "/home/ubuntu/anaconda/envs/spacy/lib/python3.6/http/client.py", line 1239, in request
    self._send_request(method, url, body, headers, encode_chunked)
  File "/home/ubuntu/anaconda/envs/spacy/lib/python3.6/http/client.py", line 1285, in _send_request
    self.endheaders(body, encode_chunked=encode_chunked)
  File "/home/ubuntu/anaconda/envs/spacy/lib/python3.6/http/client.py", line 1234, in endheaders
    self._send_output(message_body, encode_chunked=encode_chunked)
  File "/home/ubuntu/anaconda/envs/spacy/lib/python3.6/http/client.py", line 1026, in _send_output
    self.send(msg)
  File "/home/ubuntu/anaconda/envs/spacy/lib/python3.6/http/client.py", line 964, in send
    self.connect()
  File "/home/ubuntu/anaconda/envs/spacy/lib/python3.6/http/client.py", line 936, in connect
    (self.host,self.port), self.timeout, self.source_address)
  File "/home/ubuntu/anaconda/envs/spacy/lib/python3.6/socket.py", line 724, in create_connection
    if err is not None:
  File "/home/ubuntu/anaconda/envs/spacy/lib/python3.6/socket.py", line 713, in create_connection
    sock.bind(source_address)
ConnectionRefusedError: [Errno 111] Connection refused

During handling of the above exception, another exception occurred:

Traceback (most recent call last):
  File "/home/ubuntu/spark/python/lib/pyspark.zip/pyspark/worker.py", line 111, in main
    process()
  File "/home/ubuntu/spark/python/lib/pyspark.zip/pyspark/worker.py", line 106, in process
    serializer.dump_stream(func(split_index, iterator), outfile)
  File "/home/ubuntu/spark/python/lib/pyspark.zip/pyspark/serializers.py", line 263, in dump_stream
    vs = list(itertools.islice(iterator, batch))
  File "/home/ubuntu/anaconda/envs/spacy/lib/python3.6/site-packages/elephas-0.3-py3.6.egg/elephas/spark_model.py", line 304, in train
    weights_before_training = get_server_weights(self.master_url)
  File "/home/ubuntu/anaconda/envs/spacy/lib/python3.6/site-packages/elephas-0.3-py3.6.egg/elephas/spark_model.py", line 33, in get_server_weights
    ret = urllib2.urlopen(request).read()
  File "/home/ubuntu/anaconda/envs/spacy/lib/python3.6/urllib/request.py", line 223, in urlopen
    return opener.open(url, data, timeout)
  File "/home/ubuntu/anaconda/envs/spacy/lib/python3.6/urllib/request.py", line 526, in open
    response = self._open(req, data)
  File "/home/ubuntu/anaconda/envs/spacy/lib/python3.6/urllib/request.py", line 544, in _open
    '_open', req)
  File "/home/ubuntu/anaconda/envs/spacy/lib/python3.6/urllib/request.py", line 504, in _call_chain
    result = func(*args)
  File "/home/ubuntu/anaconda/envs/spacy/lib/python3.6/urllib/request.py", line 1346, in http_open
    return self.do_open(http.client.HTTPConnection, req)
  File "/home/ubuntu/anaconda/envs/spacy/lib/python3.6/urllib/request.py", line 1320, in do_open
    raise URLError(err)
urllib.error.URLError: <urlopen error [Errno 111] Connection refused>

    at org.apache.spark.api.python.PythonRunner$$anon$1.read(PythonRDD.scala:166)
    at org.apache.spark.api.python.PythonRunner$$anon$1.<init>(PythonRDD.scala:207)
    at org.apache.spark.api.python.PythonRunner.compute(PythonRDD.scala:125)
    at org.apache.spark.api.python.PythonRDD.compute(PythonRDD.scala:70)
    at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:306)
    at org.apache.spark.rdd.RDD.iterator(RDD.scala:270)
    at org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:66)
    at org.apache.spark.scheduler.Task.run(Task.scala:89)
    at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:227)
    at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149)
    at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624)
    at java.lang.Thread.run(Thread.java:748)

Driver stacktrace:
    at org.apache.spark.scheduler.DAGScheduler.org$apache$spark$scheduler$DAGScheduler$$failJobAndIndependentStages(DAGScheduler.scala:1431)
    at org.apache.spark.scheduler.DAGScheduler$$anonfun$abortStage$1.apply(DAGScheduler.scala:1419)
    at org.apache.spark.scheduler.DAGScheduler$$anonfun$abortStage$1.apply(DAGScheduler.scala:1418)
    at scala.collection.mutable.ResizableArray$class.foreach(ResizableArray.scala:59)
    at scala.collection.mutable.ArrayBuffer.foreach(ArrayBuffer.scala:47)
    at org.apache.spark.scheduler.DAGScheduler.abortStage(DAGScheduler.scala:1418)
    at org.apache.spark.scheduler.DAGScheduler$$anonfun$handleTaskSetFailed$1.apply(DAGScheduler.scala:799)
    at org.apache.spark.scheduler.DAGScheduler$$anonfun$handleTaskSetFailed$1.apply(DAGScheduler.scala:799)
    at scala.Option.foreach(Option.scala:236)
    at org.apache.spark.scheduler.DAGScheduler.handleTaskSetFailed(DAGScheduler.scala:799)
    at org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.doOnReceive(DAGScheduler.scala:1640)
    at org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.onReceive(DAGScheduler.scala:1599)
    at org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.onReceive(DAGScheduler.scala:1588)
    at org.apache.spark.util.EventLoop$$anon$1.run(EventLoop.scala:48)
    at org.apache.spark.scheduler.DAGScheduler.runJob(DAGScheduler.scala:620)
    at org.apache.spark.SparkContext.runJob(SparkContext.scala:1832)
    at org.apache.spark.SparkContext.runJob(SparkContext.scala:1845)
    at org.apache.spark.SparkContext.runJob(SparkContext.scala:1858)
    at org.apache.spark.SparkContext.runJob(SparkContext.scala:1929)
    at org.apache.spark.rdd.RDD$$anonfun$collect$1.apply(RDD.scala:927)
    at org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:150)
    at org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:111)
    at org.apache.spark.rdd.RDD.withScope(RDD.scala:316)
    at org.apache.spark.rdd.RDD.collect(RDD.scala:926)
    at org.apache.spark.api.python.PythonRDD$.collectAndServe(PythonRDD.scala:405)
    at org.apache.spark.api.python.PythonRDD.collectAndServe(PythonRDD.scala)
    at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
    at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)
    at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
    at java.lang.reflect.Method.invoke(Method.java:498)
    at py4j.reflection.MethodInvoker.invoke(MethodInvoker.java:231)
    at py4j.reflection.ReflectionEngine.invoke(ReflectionEngine.java:381)
    at py4j.Gateway.invoke(Gateway.java:259)
    at py4j.commands.AbstractCommand.invokeMethod(AbstractCommand.java:133)
    at py4j.commands.CallCommand.execute(CallCommand.java:79)
    at py4j.GatewayConnection.run(GatewayConnection.java:209)
    at java.lang.Thread.run(Thread.java:748)
Caused by: org.apache.spark.api.python.PythonException: Traceback (most recent call last):
  File "/home/ubuntu/anaconda/envs/spacy/lib/python3.6/urllib/request.py", line 1318, in do_open
    encode_chunked=req.has_header('Transfer-encoding'))
  File "/home/ubuntu/anaconda/envs/spacy/lib/python3.6/http/client.py", line 1239, in request
    self._send_request(method, url, body, headers, encode_chunked)
  File "/home/ubuntu/anaconda/envs/spacy/lib/python3.6/http/client.py", line 1285, in _send_request
    self.endheaders(body, encode_chunked=encode_chunked)
  File "/home/ubuntu/anaconda/envs/spacy/lib/python3.6/http/client.py", line 1234, in endheaders
    self._send_output(message_body, encode_chunked=encode_chunked)
  File "/home/ubuntu/anaconda/envs/spacy/lib/python3.6/http/client.py", line 1026, in _send_output
    self.send(msg)
  File "/home/ubuntu/anaconda/envs/spacy/lib/python3.6/http/client.py", line 964, in send
    self.connect()
  File "/home/ubuntu/anaconda/envs/spacy/lib/python3.6/http/client.py", line 936, in connect
    (self.host,self.port), self.timeout, self.source_address)
  File "/home/ubuntu/anaconda/envs/spacy/lib/python3.6/socket.py", line 724, in create_connection
    if err is not None:
  File "/home/ubuntu/anaconda/envs/spacy/lib/python3.6/socket.py", line 713, in create_connection
    sock.bind(source_address)
ConnectionRefusedError: [Errno 111] Connection refused

During handling of the above exception, another exception occurred:

Traceback (most recent call last):
  File "/home/ubuntu/spark/python/lib/pyspark.zip/pyspark/worker.py", line 111, in main
    process()
  File "/home/ubuntu/spark/python/lib/pyspark.zip/pyspark/worker.py", line 106, in process
    serializer.dump_stream(func(split_index, iterator), outfile)
  File "/home/ubuntu/spark/python/lib/pyspark.zip/pyspark/serializers.py", line 263, in dump_stream
    vs = list(itertools.islice(iterator, batch))
  File "/home/ubuntu/anaconda/envs/spacy/lib/python3.6/site-packages/elephas-0.3-py3.6.egg/elephas/spark_model.py", line 304, in train
    weights_before_training = get_server_weights(self.master_url)
  File "/home/ubuntu/anaconda/envs/spacy/lib/python3.6/site-packages/elephas-0.3-py3.6.egg/elephas/spark_model.py", line 33, in get_server_weights
    ret = urllib2.urlopen(request).read()
  File "/home/ubuntu/anaconda/envs/spacy/lib/python3.6/urllib/request.py", line 223, in urlopen
    return opener.open(url, data, timeout)
  File "/home/ubuntu/anaconda/envs/spacy/lib/python3.6/urllib/request.py", line 526, in open
    response = self._open(req, data)
  File "/home/ubuntu/anaconda/envs/spacy/lib/python3.6/urllib/request.py", line 544, in _open
    '_open', req)
  File "/home/ubuntu/anaconda/envs/spacy/lib/python3.6/urllib/request.py", line 504, in _call_chain
    result = func(*args)
  File "/home/ubuntu/anaconda/envs/spacy/lib/python3.6/urllib/request.py", line 1346, in http_open
    return self.do_open(http.client.HTTPConnection, req)
  File "/home/ubuntu/anaconda/envs/spacy/lib/python3.6/urllib/request.py", line 1320, in do_open
    raise URLError(err)
urllib.error.URLError: <urlopen error [Errno 111] Connection refused>

    at org.apache.spark.api.python.PythonRunner$$anon$1.read(PythonRDD.scala:166)
    at org.apache.spark.api.python.PythonRunner$$anon$1.<init>(PythonRDD.scala:207)
    at org.apache.spark.api.python.PythonRunner.compute(PythonRDD.scala:125)
    at org.apache.spark.api.python.PythonRDD.compute(PythonRDD.scala:70)
    at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:306)
    at org.apache.spark.rdd.RDD.iterator(RDD.scala:270)
    at org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:66)
    at org.apache.spark.scheduler.Task.run(Task.scala:89)
    at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:227)
    at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149)
    at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624)
    ... 1 more
dbl001 commented 6 years ago

I’ve made progress… Spark_ML_Pipeline.ipynb runs on OS X, but not on my EC2 instance running Ubuntu 14.04LTS.

It appears that there are ‘flask’ dependencies in ‘spark_model.py’. I’ve opened port 5000-5010 on my EC2 security group but I’m still getting an error on port 5000.

ConnectionRefusedError: [Errno 111] Connection refused

What are the ‘flask’ requirements?

On Apr 26, 2018, at 12:41 AM, Max Pumperla notifications@github.com wrote:

@dbl001 https://github.com/dbl001 I have to guess here. it seems this comes down to this line:

at org.apache.spark.rdd.RDD.countByValue(RDD.scala:1205) might be worth upgrading to Spark or downgrading to java 8 (pyspark is always a little brittle), see here: https://stackoverflow.com/questions/48603071/illegalargumentexception-with-spark-collect-on-jupyter?noredirect=1&lq=1&utm_medium=organic&utm_source=google_rich_qa&utm_campaign=google_rich_qa https://stackoverflow.com/questions/48603071/illegalargumentexception-with-spark-collect-on-jupyter?noredirect=1&lq=1&utm_medium=organic&utm_source=google_rich_qa&utm_campaign=google_rich_qa in any case, I don't think this is really an elephas issue, but I'll leave this open anyway :)

— You are receiving this because you were mentioned. Reply to this email directly, view it on GitHub https://github.com/maxpumperla/elephas/issues/86#issuecomment-384544636, or mute the thread https://github.com/notifications/unsubscribe-auth/AC9i22imhvu1ittIaxHey1IqgUrtf2Peks5tsXoUgaJpZM4TkEo3.

maxpumperla commented 6 years ago

@dbl001 flask is used for the http server, so you have those requirements already (otherwise you'd get a different error). Not sure what exactly happens there, but it seems the workers can't connect to the master's port 5000. Note that the async mode you're using circumvents Spark's built-in communication between nodes and is experimental at best. I'd go with synchronous mode on AWS.

dbl001 commented 6 years ago

Thanks!  In ‘synchronous’ mode on AWS:AttributeError: 'Sequential' object has no attribute 'constraints'for delta in deltas: constraints = self.master_network.constraints new_parameters = self.optimizer.get_updates(self.weights, constraints, delta)(same as: 'Model' object has no attribute 'constraints'

  #87)ttributeError                            Traceback (most recent call last)
in () 1 from pyspark.mllib.evaluation import MulticlassMetrics 2 ----> 3 fitted_pipeline = pipeline.fit(train_df) # Fit model to data 4 5 prediction = fitted_pipeline.transform(train_df) # Evaluate on train data. ~/spark/python/pyspark/ml/pipeline.py in fit(self, dataset, params) 67 return self.copy(params)._fit(dataset) 68 else: ---> 69 return self._fit(dataset) 70 else: 71 raise ValueError("Params must be either a param map or a list/tuple of param maps, " ~/spark/python/pyspark/ml/pipeline.py in _fit(self, dataset) 211 dataset = stage.transform(dataset) 212 else: # must be an Estimator --> 213 model = stage.fit(dataset) 214 transformers.append(model) 215 if i < indexOfLastEstimator: ~/spark/python/pyspark/ml/pipeline.py in fit(self, dataset, params) 67 return self.copy(params)._fit(dataset) 68 else: ---> 69 return self._fit(dataset) 70 else: 71 raise ValueError("Params must be either a param map or a list/tuple of param maps, " ~/anaconda/envs/spacy/lib/python3.6/site-packages/elephas-0.3-py3.6.egg/elephas/ml_model.py in _fit(self, df) 57 num_workers=self.get_num_workers()) 58 spark_model.train(simple_rdd, nb_epoch=self.get_nb_epoch(), batch_size=self.get_batch_size(), ---> 59 verbose=self.get_verbosity(), validation_split=self.get_validation_split()) 60 61 model_weights = spark_model.master_network.get_weights() ~/anaconda/envs/spacy/lib/python3.6/site-packages/elephas-0.3-py3.6.egg/elephas/spark_model.py in train(self, rdd, nb_epoch, batch_size, verbose, validation_split) 192 193 if self.mode in ['asynchronous', 'synchronous', 'hogwild']: --> 194 self._train(rdd, nb_epoch, batch_size, verbose, validation_split, master_url) 195 else: 196 print("""Choose from one of the modes: asynchronous, synchronous or hogwild""") ~/anaconda/envs/spacy/lib/python3.6/site-packages/elephas-0.3-py3.6.egg/elephas/spark_model.py in _train(self, rdd, nb_epoch, batch_size, verbose, validation_split, master_url) 224 new_parameters = self.master_network.get_weights() 225 for delta in deltas: --> 226 constraints = self.master_network.constraints 227 new_parameters = self.optimizer.get_updates(self.weights, constraints, delta) 228 self.master_network.set_weights(new_parameters) AttributeError: 'Sequential' object has no attribute 'constraints'On May 10, 2018, at 12:00 AM, Max Pumperla wrote:@dbl001 flask is used for the http server, so you have those requirements already (otherwise you'd get a different error). Not sure what exactly happens there, but it seems the workers can't connect to the master's port 5000. Note that the async mode you're using circumvents Spark's built-in communication between nodes and is experimental at best. I'd go with synchronous mode on AWS.—You are receiving this because you were mentioned.Reply to this email directly, view it on GitHub, or mute the thread. {"api_version":"1.0","publisher":{"api_key":"05dde50f1d1a384dd78767c55493e4bb","name":"GitHub"},"entity":{"external_key":"github/maxpumperla/elephas","title":"maxpumperla/elephas","subtitle":"GitHub repository","main_image_url":"https://cloud.githubusercontent.com/assets/143418/17495839/a5054eac-5d88-11e6-95fc-7290892c7bb5.png" class="">https://cloud.githubusercontent.com/assets/143418/17495839/a5054eac-5d88-11e6-95fc-7290892c7bb5.png","avatar_image_url":"https://cloud.githubusercontent.com/assets/143418/15842166/7c72db34-2c0b-11e6-9aed-b52498112777.png" class="">https://cloud.githubusercontent.com/assets/143418/15842166/7c72db34-2c0b-11e6-9aed-b52498112777.png","action":{"name":"Open in GitHub","url":"https://github.com/maxpumperla/elephas" class="">https://github.com/maxpumperla/elephas</a>"}},"updates":{"snippets":[{"icon":"PERSON","message":"@maxpumperla in #86: @dbl001 flask is used for the http server, so you have those requirements already (otherwise you'd get a different error). Not sure what exactly happens there, but it seems the workers can't connect to the master's port 5000. Note that the `async` mode you're using circumvents Spark's built-in communication between nodes and is experimental at best. I'd go with synchronous mode on AWS."}],"action":{"name":"View Issue","url":"https://github.com/maxpumperla/elephas/issues/86#issuecomment-387970140" class="">https://github.com/maxpumperla/elephas/issues/86#issuecomment-387970140</a>"}}} {"@type":"MessageCard","@context":"http://schema.org/extensions" class="">http://schema.org/extensions","hideOriginalBody":"false","originator":"37567f93-e2a7-4e2a-ad37-a9160fc62647","title":"Re: [maxpumperla/elephas] Py4JJavaError: An error occurred while calling o258.fit. : java.lang.IllegalArgumentException (#86)","sections":[{"text":"","activityTitle":"**Max Pumperla**","activityImage":"https://avatars3.githubusercontent.com/u/3462566?s=160\u0026v=4" class="">https://avatars3.githubusercontent.com/u/3462566?s=160\u0026v=4","activitySubtitle":"@maxpumperla","facts":[]}],"potentialAction":[{"name":"Add a comment","@type":"ActionCard","inputs":[{"isMultiLine":true,"@type":"TextInput","id":"IssueComment","isRequired":false}],"actions":[{"name":"Comment","@type":"HttpPOST","target":"https://api.github.com" class="">https://api.github.com","body":"{\"commandName\":\"IssueComment\",\"repositoryFullName\":\"maxpumperla/elephas\",\"issueId\":86,\"IssueComment\":\"{{IssueComment.value}}\"}"}]},{"name":"Close issue","@type":"HttpPOST","target":"https://api.github.com" class="">https://api.github.com","body":"{\"commandName\":\"IssueClose\",\"repositoryFullName\":\"maxpumperla/elephas\",\"issueId\":86}"},{"targets":[{"os":"default","uri":"https://github.com/maxpumperla/elephas/issues/86#issuecomment-387970140" class="">https://github.com/maxpumperla/elephas/issues/86#issuecomment-387970140</a>"}],"@type":"OpenUri","name":"View on GitHub"},{"name":"Unsubscribe","@type":"HttpPOST","target":"https://api.github.com" class="">https://api.github.com","body":"{\"commandName\":\"MuteNotification\",\"threadId\":328223287}"}],"themeColor":"26292E"}
maxpumperla commented 6 years ago

@dbl001 I'm closing this one for now, all of what we discussed is either covered in other open issues or needs to be addressed otherwise. thanks