Closed miguelgfierro closed 3 years ago
conda activate reco_base
pytest tests/unit -m "not notebooks and not spark and not gpu" --durations 0 --disable-warnings
============================================================ test session starts =============================================================
platform linux -- Python 3.6.11, pytest-6.1.1, py-1.9.0, pluggy-0.13.1
rootdir: /home/recocat/notebooks/repos/recommenders
collected 218 items / 92 deselected / 126 selected
tests/unit/test_aks_utils.py ... [ 2%]
tests/unit/test_cornac_utils.py .. [ 3%]
tests/unit/test_covid_utils.py ...ss [ 7%]
tests/unit/test_dataset.py .. [ 9%]
tests/unit/test_general_utils.py .. [ 11%]
tests/unit/test_geoimc.py ............ [ 20%]
tests/unit/test_lightfm_utils.py .... [ 23%]
tests/unit/test_ncf_dataset.py ... [ 26%]
tests/unit/test_nni_utils.py ........... [ 34%]
tests/unit/test_pandas_df_utils.py ...... [ 39%]
tests/unit/test_plot.py . [ 40%]
tests/unit/test_python_evaluation.py .............. [ 51%]
tests/unit/test_python_splitter.py ....... [ 57%]
tests/unit/test_python_utils.py ...... [ 61%]
tests/unit/test_sar_singlenode.py ................... [ 76%]
tests/unit/test_sparse.py .. [ 78%]
tests/unit/test_surprise_utils.py .. [ 80%]
tests/unit/test_sweep.py . [ 80%]
tests/unit/test_tfidf_utils.py ........ [ 87%]
tests/unit/test_timer.py ..... [ 91%]
tests/unit/test_vowpal_wabbit.py ...... [ 96%]
tests/unit/test_wikidata.py ..... [100%]
============================================================= slowest durations ==============================================================
2.00s call tests/unit/test_timer.py::test_timer
1.26s call tests/unit/test_wikidata.py::test_read_linked_entities
0.95s call tests/unit/test_geoimc.py::test_imcproblem[dataPtr0-3]
0.53s setup tests/unit/test_ncf_dataset.py::test_data_preprocessing
0.50s call tests/unit/test_wikidata.py::test_find_wikidata_id
0.43s call tests/unit/test_wikidata.py::test_query_entity_links
0.31s setup tests/unit/test_sar_singlenode.py::test_sar_item_similarity[1-cooccurrence-count]
0.29s call tests/unit/test_ncf_dataset.py::test_data_preprocessing
0.25s call tests/unit/test_cornac_utils.py::test_recommend_k_items
0.18s call tests/unit/test_ncf_dataset.py::test_train_loader
0.16s call tests/unit/test_tfidf_utils.py::test_fit
0.15s call tests/unit/test_tfidf_utils.py::test_tokenize_text
0.15s call tests/unit/test_dataset.py::test_maybe_download
0.14s call tests/unit/test_ncf_dataset.py::test_test_loader
0.13s setup tests/unit/test_tfidf_utils.py::test_get_tokens
0.12s call tests/unit/test_python_evaluation.py::test_python_precision
0.10s call tests/unit/test_nni_utils.py::test_check_experiment_status_running
0.10s call tests/unit/test_nni_utils.py::test_check_metrics_written_timeout
0.10s call tests/unit/test_nni_utils.py::test_check_experiment_status_no_more_trial
0.10s call tests/unit/test_nni_utils.py::test_check_stopped_timeout
0.08s call tests/unit/test_cornac_utils.py::test_predict
0.08s call tests/unit/test_sar_singlenode.py::test_sar_item_similarity[3-lift-lift]
0.08s call tests/unit/test_python_evaluation.py::test_python_map_at_k
0.08s call tests/unit/test_python_evaluation.py::test_python_ndcg_at_k
0.08s call tests/unit/test_python_splitter.py::test_chrono_splitter
0.07s call tests/unit/test_sar_singlenode.py::test_sar_item_similarity[3-jaccard-jac]
0.06s call tests/unit/test_python_evaluation.py::test_python_recall
0.06s call tests/unit/test_python_splitter.py::test_stratified_splitter
0.06s call tests/unit/test_sar_singlenode.py::test_sar_item_similarity[1-jaccard-jac]
0.06s call tests/unit/test_sar_singlenode.py::test_sar_item_similarity[1-lift-lift]
0.06s call tests/unit/test_plot.py::test_line_graph
0.05s call tests/unit/test_sar_singlenode.py::test_recommend_k_items[3-cooccurrence-count]
0.05s call tests/unit/test_sar_singlenode.py::test_recommend_k_items[3-lift-lift]
0.05s call tests/unit/test_sar_singlenode.py::test_sar_item_similarity[3-cooccurrence-count]
0.05s call tests/unit/test_sar_singlenode.py::test_recommend_k_items[3-jaccard-jac]
0.05s call tests/unit/test_sar_singlenode.py::test_sar_item_similarity[1-cooccurrence-count]
0.04s call tests/unit/test_sar_singlenode.py::test_user_affinity
0.04s call tests/unit/test_sar_singlenode.py::test_get_item_based_topk
0.03s call tests/unit/test_pandas_df_utils.py::test_user_item_pairs
0.03s setup tests/unit/test_lightfm_utils.py::test_fitting
0.03s call tests/unit/test_tfidf_utils.py::test_get_top_k_recommendations
0.03s call tests/unit/test_geoimc.py::test_imcproblem[dataPtr1-3]
0.02s call tests/unit/test_python_evaluation.py::test_merge_ranking
0.02s call tests/unit/test_sar_singlenode.py::test_get_normalized_scores
0.02s call tests/unit/test_surprise_utils.py::test_recommend_k_items
0.02s call tests/unit/test_python_splitter.py::test_random_splitter
0.02s call tests/unit/test_geoimc.py::test_reduce_dims
0.01s call tests/unit/test_surprise_utils.py::test_predict
0.01s call tests/unit/test_sar_singlenode.py::test_predict[lift-True]
0.01s call tests/unit/test_sar_singlenode.py::test_fit[lift-True]
0.01s call tests/unit/test_pandas_df_utils.py::test_csv_to_libffm
0.01s call tests/unit/test_python_splitter.py::test_min_rating_filter
0.01s call tests/unit/test_sparse.py::test_sparse_to_df
0.01s call tests/unit/test_sar_singlenode.py::test_predict_all_items
0.01s call tests/unit/test_python_evaluation.py::test_python_auc
0.01s call tests/unit/test_sar_singlenode.py::test_get_popularity_based_topk
0.01s setup tests/unit/test_sparse.py::test_df_to_sparse
0.01s call tests/unit/test_sar_singlenode.py::test_predict[jaccard-False]
0.01s call tests/unit/test_covid_utils.py::test_remove_duplicates
0.01s call tests/unit/test_python_evaluation.py::test_python_logloss
0.01s setup tests/unit/test_python_splitter.py::test_random_splitter
0.01s call tests/unit/test_sar_singlenode.py::test_fit[jaccard-False]
0.01s call tests/unit/test_covid_utils.py::test_clean_dataframe
0.01s call tests/unit/test_vowpal_wabbit.py::test_fit_and_predict
0.01s call tests/unit/test_python_evaluation.py::test_python_exp_var
0.01s call tests/unit/test_python_evaluation.py::test_python_rmse
0.01s call tests/unit/test_python_evaluation.py::test_python_rsquared
0.01s call tests/unit/test_python_evaluation.py::test_python_mae
0.01s call tests/unit/test_covid_utils.py::test_remove_nan
0.01s call tests/unit/test_sparse.py::test_df_to_sparse
(306 durations < 0.005s hidden. Use -vv to show these durations.)
======================================== 124 passed, 2 skipped, 92 deselected, 107 warnings in 12.96s ========================================
conda activate reco_base
pytest tests/unit -m "notebooks and not spark and not gpu" --durations 0 --disable-warnings
============================================================ test session starts =============================================================
platform linux -- Python 3.6.11, pytest-6.1.1, py-1.9.0, pluggy-0.13.1
rootdir: /home/recocat/notebooks/repos/recommenders
collected 218 items / 207 deselected / 11 selected
tests/unit/test_notebook_utils.py . [ 9%]
tests/unit/test_notebooks_python.py .......... [100%]
======================================== 11 passed, 207 deselected, 107 warnings in 502.59s (0:08:22) ========================================
conda activate reco_gpu
pytest tests/unit -m "not notebooks and not spark and gpu" --durations 0 --disable-warnings
============================================================ test session starts =============================================================
platform linux -- Python 3.6.11, pytest-6.1.1, py-1.9.0, pluggy-0.13.1
rootdir: /home/recocat/notebooks/repos/recommenders
collected 218 items / 165 deselected / 53 selected
tests/unit/test_deeprec_model.py .... [ 7%]
tests/unit/test_deeprec_utils.py ....... [ 20%]
tests/unit/test_gpu_utils.py ..s.... [ 33%]
tests/unit/test_ncf_singlenode.py .............. [ 60%]
tests/unit/test_newsrec_model.py .... [ 67%]
tests/unit/test_newsrec_utils.py ...... [ 79%]
tests/unit/test_notebooks_gpu.py . [ 81%]
tests/unit/test_rbm.py ... [ 86%]
tests/unit/test_tf_utils.py .... [ 94%]
tests/unit/test_wide_deep_utils.py ... [100%]
============================================================= slowest durations ==============================================================
301.59s call tests/unit/test_deeprec_model.py::test_slirec_component_definition
30.82s call tests/unit/test_notebooks_gpu.py::test_dkn_quickstart
9.35s call tests/unit/test_tf_utils.py::test_evaluation_log_hook
8.29s call tests/unit/test_newsrec_utils.py::test_naml_iterator
8.24s call tests/unit/test_newsrec_utils.py::test_news_iterator
6.31s call tests/unit/test_deeprec_model.py::test_lightgcn_component_definition
5.64s call tests/unit/test_newsrec_model.py::test_naml_component_definition
5.45s call tests/unit/test_newsrec_model.py::test_nrms_component_definition
4.78s call tests/unit/test_newsrec_model.py::test_lstur_component_definition
4.50s call tests/unit/test_tf_utils.py::test_pandas_input_fn_for_saved_model
4.47s call tests/unit/test_newsrec_model.py::test_npa_component_definition
4.44s call tests/unit/test_deeprec_utils.py::test_DKN_iterator
4.18s call tests/unit/test_deeprec_model.py::test_xdeepfm_component_definition
3.88s call tests/unit/test_deeprec_model.py::test_dkn_component_definition
3.69s call tests/unit/test_newsrec_utils.py::test_load_yaml_file
3.09s call tests/unit/test_newsrec_utils.py::test_prepare_hparams[wordEmb_file]
3.03s call tests/unit/test_wide_deep_utils.py::test_wide_deep_model
2.95s call tests/unit/test_newsrec_utils.py::test_prepare_hparams[userDict_file]
2.81s call tests/unit/test_newsrec_utils.py::test_prepare_hparams[wordDict_file]
2.68s call tests/unit/test_wide_deep_utils.py::test_deep_model
1.48s call tests/unit/test_ncf_singlenode.py::test_neumf_save_load[4-8]
1.48s call tests/unit/test_ncf_singlenode.py::test_neumf_save_load[5-5]
1.20s call tests/unit/test_wide_deep_utils.py::test_wide_model
1.16s call tests/unit/test_ncf_singlenode.py::test_regular_save_load[NeuMF-5-5]
0.97s call tests/unit/test_ncf_singlenode.py::test_regular_save_load[GMF-5-5]
0.90s call tests/unit/test_ncf_singlenode.py::test_regular_save_load[MLP-5-5]
0.86s call tests/unit/test_ncf_singlenode.py::test_predict[NeuMF]
0.83s call tests/unit/test_ncf_singlenode.py::test_fit[NeuMF]
0.82s call tests/unit/test_ncf_singlenode.py::test_predict[GMF]
0.75s call tests/unit/test_rbm.py::test_sampling_funct
0.65s call tests/unit/test_ncf_singlenode.py::test_predict[MLP]
0.64s call tests/unit/test_deeprec_utils.py::test_Sequential_Iterator
0.64s call tests/unit/test_ncf_singlenode.py::test_fit[MLP]
0.62s call tests/unit/test_rbm.py::test_train_param_init
0.56s call tests/unit/test_ncf_singlenode.py::test_fit[GMF]
0.48s call tests/unit/test_ncf_singlenode.py::test_init[NeuMF-1-1]
0.44s setup tests/unit/test_ncf_singlenode.py::test_fit[NeuMF]
0.36s call tests/unit/test_ncf_singlenode.py::test_init[MLP-4-8]
0.31s call tests/unit/test_ncf_singlenode.py::test_init[GMF-10-10]
0.20s call tests/unit/test_gpu_utils.py::test_get_cuda_version
0.19s call tests/unit/test_tf_utils.py::test_pandas_input_fn
0.09s call tests/unit/test_gpu_utils.py::test_get_cudnn_version
0.05s teardown tests/unit/test_newsrec_utils.py::test_naml_iterator
0.04s teardown tests/unit/test_newsrec_utils.py::test_news_iterator
0.02s teardown tests/unit/test_newsrec_utils.py::test_prepare_hparams[wordEmb_file]
0.02s teardown tests/unit/test_newsrec_model.py::test_lstur_component_definition
0.02s teardown tests/unit/test_newsrec_model.py::test_nrms_component_definition
0.02s teardown tests/unit/test_newsrec_model.py::test_npa_component_definition
0.02s teardown tests/unit/test_newsrec_model.py::test_naml_component_definition
0.02s teardown tests/unit/test_newsrec_utils.py::test_prepare_hparams[wordDict_file]
0.02s teardown tests/unit/test_newsrec_utils.py::test_load_yaml_file
0.02s teardown tests/unit/test_newsrec_utils.py::test_prepare_hparams[userDict_file]
0.01s call tests/unit/test_deeprec_utils.py::test_FFM_iterator
0.01s call tests/unit/test_deeprec_utils.py::test_prepare_hparams[FEATURE_COUNT]
0.01s call tests/unit/test_deeprec_utils.py::test_prepare_hparams[dim]
0.01s call tests/unit/test_deeprec_utils.py::test_prepare_hparams[data_format]
0.01s call tests/unit/test_deeprec_utils.py::test_load_yaml_file
0.01s call tests/unit/test_gpu_utils.py::test_tensorflow_gpu
(100 durations < 0.005s hidden. Use -vv to show these durations.)
================================== 52 passed, 1 skipped, 165 deselected, 2894 warnings in 439.55s (0:07:19) ==================================
conda activate reco_gpu
pytest tests/unit -m "notebooks and not spark and gpu" --durations 0 --disable-warnings
============================================================ test session starts =============================================================
platform linux -- Python 3.6.11, pytest-6.1.1, py-1.9.0, pluggy-0.13.1
rootdir: /home/recocat/notebooks/repos/recommenders
collected 218 items / 212 deselected / 6 selected
tests/unit/test_notebooks_gpu.py ...... [100%]
============================================================= slowest durations ==============================================================
201.38s call tests/unit/test_notebooks_gpu.py::test_wide_deep
101.64s call tests/unit/test_notebooks_gpu.py::test_ncf_deep_dive
64.90s call tests/unit/test_notebooks_gpu.py::test_xdeepfm
33.06s call tests/unit/test_notebooks_gpu.py::test_ncf
32.19s call tests/unit/test_notebooks_gpu.py::test_fastai
0.09s call tests/unit/test_notebooks_gpu.py::test_gpu_vm
0.01s teardown tests/unit/test_notebooks_gpu.py::test_wide_deep
(11 durations < 0.005s hidden. Use -vv to show these durations.)
======================================== 6 passed, 212 deselected, 105 warnings in 436.50s (0:07:16) =========================================
conda activate reco_pyspark
pytest tests/unit -m "not notebooks and spark and not gpu" --durations 0 --disable-warnings
=========================================================================== test session starts ===========================================================================
platform linux -- Python 3.6.11, pytest-6.1.1, py-1.9.0, pluggy-0.13.1
rootdir: /home/recocat/notebooks/repos/recommenders
collected 218 items / 202 deselected / 16 selected
tests/unit/test_spark_evaluation.py ........... [ 68%]
tests/unit/test_spark_splitter.py ..... [100%]
============================================================================ slowest durations ============================================================================
3.18s call tests/unit/test_spark_splitter.py::test_chrono_splitter
2.85s setup tests/unit/test_spark_evaluation.py::test_init_spark
2.09s call tests/unit/test_spark_evaluation.py::test_init_spark_rating_eval
1.91s call tests/unit/test_spark_evaluation.py::test_spark_python_match
1.89s call tests/unit/test_spark_evaluation.py::test_spark_precision
1.69s call tests/unit/test_spark_splitter.py::test_stratified_splitter
1.60s call tests/unit/test_spark_splitter.py::test_timestamp_splitter
1.51s call tests/unit/test_spark_evaluation.py::test_spark_recall
1.39s call tests/unit/test_spark_evaluation.py::test_spark_exp_var
1.33s call tests/unit/test_spark_evaluation.py::test_spark_rmse
1.20s setup tests/unit/test_spark_evaluation.py::test_init_spark_rating_eval
1.10s call tests/unit/test_spark_evaluation.py::test_spark_ndcg
1.07s call tests/unit/test_spark_evaluation.py::test_spark_map
1.00s teardown tests/unit/test_spark_splitter.py::test_timestamp_splitter
0.87s call tests/unit/test_spark_evaluation.py::test_spark_mae
0.76s call tests/unit/test_spark_evaluation.py::test_spark_rsquared
0.64s call tests/unit/test_spark_splitter.py::test_min_rating_filter
0.59s call tests/unit/test_spark_splitter.py::test_random_splitter
0.15s setup tests/unit/test_spark_splitter.py::test_min_rating_filter
(29 durations < 0.005s hidden. Use -vv to show these durations.)
============================================================ 16 passed, 202 deselected, 105 warnings in 30.16s ============================================================
conda activate reco_pyspark
pytest tests/unit -m "notebooks and spark and not gpu" --durations 0 --disable-warnings
=========================================================================== test session starts ===========================================================================
platform linux -- Python 3.6.11, pytest-6.1.1, py-1.9.0, pluggy-0.13.1
rootdir: /home/recocat/notebooks/repos/recommenders
collected 218 items / 212 deselected / 6 selected
tests/unit/test_notebooks_pyspark.py .....F [100%]
================================================================================ FAILURES =================================================================================
___________________________________________________________________ test_mmlspark_lightgbm_criteo_runs ____________________________________________________________________
notebooks = {'als_deep_dive': '/home/recocat/notebooks/repos/recommenders/examples/02_model_collaborative_filtering/als_deep_dive....'/home/recocat/notebooks/repos/recommenders/examples/02_model_collaborative_filtering/cornac_bpr_deep_dive.ipynb', ...}
@pytest.mark.notebooks
@pytest.mark.spark
@pytest.mark.skipif(sys.platform == "win32", reason="Not implemented on Windows")
def test_mmlspark_lightgbm_criteo_runs(notebooks):
notebook_path = notebooks["mmlspark_lightgbm_criteo"]
pm.execute_notebook(
notebook_path,
OUTPUT_NOTEBOOK,
kernel_name=KERNEL_NAME,
> parameters=dict(DATA_SIZE="sample", NUM_ITERATIONS=10, EARLY_STOPPING_ROUND=2),
)
tests/unit/test_notebooks_pyspark.py:77:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
/anaconda/envs/reco_pyspark/lib/python3.6/site-packages/papermill/execute.py:100: in execute_notebook
raise_for_execution_errors(nb, output_path)
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
nb = {'cells': [{'cell_type': 'code', 'metadata': {'inputHidden': True, 'hide_input': True}, 'execution_count': None, 'sour...end_time': '2020-10-16T13:26:20.688898', 'duration': 37.69038, 'exception': True}}, 'nbformat': 4, 'nbformat_minor': 2}
output_path = 'output.ipynb'
def raise_for_execution_errors(nb, output_path):
"""Assigned parameters into the appropriate place in the input notebook
Parameters
----------
nb : NotebookNode
Executable notebook object
output_path : str
Path to write executed notebook
"""
error = None
for cell in nb.cells:
if cell.get("outputs") is None:
continue
for output in cell.outputs:
if output.output_type == "error":
error = PapermillExecutionError(
exec_count=cell.execution_count,
source=cell.source,
ename=output.ename,
evalue=output.evalue,
traceback=output.traceback,
)
break
if error:
# Write notebook back out with the Error Message at the top of the Notebook.
error_msg = ERROR_MESSAGE_TEMPLATE % str(error.exec_count)
error_msg_cell = nbformat.v4.new_code_cell(
source="%%html\n" + error_msg,
outputs=[
nbformat.v4.new_output(output_type="display_data", data={"text/html": error_msg})
],
metadata={"inputHidden": True, "hide_input": True},
)
nb.cells = [error_msg_cell] + nb.cells
write_ipynb(nb, output_path)
> raise error
E papermill.exceptions.PapermillExecutionError:
E ---------------------------------------------------------------------------
E Exception encountered at "In [9]":
E ---------------------------------------------------------------------------
E Py4JJavaError Traceback (most recent call last)
E <ipython-input-9-2c2d97ba8c1c> in <module>
E ----> 1 model = lgbm.fit(train)
E
E /anaconda/envs/reco_pyspark/lib/python3.6/site-packages/pyspark/ml/base.py in fit(self, dataset, params)
E 130 return self.copy(params)._fit(dataset)
E 131 else:
E --> 132 return self._fit(dataset)
E 133 else:
E 134 raise ValueError("Params must be either a param map or a list/tuple of param maps, "
E
E /anaconda/envs/reco_pyspark/lib/python3.6/site-packages/pyspark/ml/wrapper.py in _fit(self, dataset)
E 293
E 294 def _fit(self, dataset):
E --> 295 java_model = self._fit_java(dataset)
E 296 model = self._create_model(java_model)
E 297 return self._copyValues(model)
E
E /anaconda/envs/reco_pyspark/lib/python3.6/site-packages/pyspark/ml/wrapper.py in _fit_java(self, dataset)
E 290 """
E 291 self._transfer_params_to_java()
E --> 292 return self._java_obj.fit(dataset._jdf)
E 293
E 294 def _fit(self, dataset):
E
E /anaconda/envs/reco_pyspark/lib/python3.6/site-packages/py4j/java_gateway.py in __call__(self, *args)
E 1255 answer = self.gateway_client.send_command(command)
E 1256 return_value = get_return_value(
E -> 1257 answer, self.gateway_client, self.target_id, self.name)
E 1258
E 1259 for temp_arg in temp_args:
E
E /anaconda/envs/reco_pyspark/lib/python3.6/site-packages/pyspark/sql/utils.py in deco(*a, **kw)
E 61 def deco(*a, **kw):
E 62 try:
E ---> 63 return f(*a, **kw)
E 64 except py4j.protocol.Py4JJavaError as e:
E 65 s = e.java_exception.toString()
E
E /anaconda/envs/reco_pyspark/lib/python3.6/site-packages/py4j/protocol.py in get_return_value(answer, gateway_client, target_id, name)
E 326 raise Py4JJavaError(
E 327 "An error occurred while calling {0}{1}{2}.\n".
E --> 328 format(target_id, ".", name), value)
E 329 else:
E 330 raise Py4JError(
E
E Py4JJavaError: An error occurred while calling o106.fit.
E : org.apache.spark.SparkException: Job aborted due to stage failure: Task 1 in stage 6.0 failed 1 times, most recent failure: Lost task 1.0 in stage 6.0 (TID 22, localhost, executor driver): java.lang.UnsatisfiedLinkError: com.microsoft.ml.lightgbm.lightgbmlibJNI.LGBM_NetworkFree()I
E at com.microsoft.ml.lightgbm.lightgbmlibJNI.LGBM_NetworkFree(Native Method)
E at com.microsoft.ml.lightgbm.lightgbmlib.LGBM_NetworkFree(lightgbmlib.java:209)
E at com.microsoft.ml.spark.lightgbm.TrainUtils$.trainLightGBM(TrainUtils.scala:415)
E at com.microsoft.ml.spark.lightgbm.LightGBMBase$$anonfun$6.apply(LightGBMBase.scala:85)
E at com.microsoft.ml.spark.lightgbm.LightGBMBase$$anonfun$6.apply(LightGBMBase.scala:85)
E at org.apache.spark.sql.execution.MapPartitionsExec$$anonfun$5.apply(objects.scala:188)
E at org.apache.spark.sql.execution.MapPartitionsExec$$anonfun$5.apply(objects.scala:185)
E at org.apache.spark.rdd.RDD$$anonfun$mapPartitionsInternal$1$$anonfun$apply$24.apply(RDD.scala:858)
E at org.apache.spark.rdd.RDD$$anonfun$mapPartitionsInternal$1$$anonfun$apply$24.apply(RDD.scala:858)
E at org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:52)
E at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:346)
E at org.apache.spark.rdd.RDD.iterator(RDD.scala:310)
E at org.apache.spark.sql.execution.SQLExecutionRDD.compute(SQLExecutionRDD.scala:55)
E at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:346)
E at org.apache.spark.rdd.RDD.iterator(RDD.scala:310)
E at org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:52)
E at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:346)
E at org.apache.spark.rdd.RDD.iterator(RDD.scala:310)
E at org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:90)
E at org.apache.spark.scheduler.Task.run(Task.scala:123)
E at org.apache.spark.executor.Executor$TaskRunner$$anonfun$10.apply(Executor.scala:408)
E at org.apache.spark.util.Utils$.tryWithSafeFinally(Utils.scala:1360)
E at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:414)
E at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149)
E at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624)
E at java.lang.Thread.run(Thread.java:748)
E
E Driver stacktrace:
E at org.apache.spark.scheduler.DAGScheduler.org$apache$spark$scheduler$DAGScheduler$$failJobAndIndependentStages(DAGScheduler.scala:1891)
E at org.apache.spark.scheduler.DAGScheduler$$anonfun$abortStage$1.apply(DAGScheduler.scala:1879)
E at org.apache.spark.scheduler.DAGScheduler$$anonfun$abortStage$1.apply(DAGScheduler.scala:1878)
E at scala.collection.mutable.ResizableArray$class.foreach(ResizableArray.scala:59)
E at scala.collection.mutable.ArrayBuffer.foreach(ArrayBuffer.scala:48)
E at org.apache.spark.scheduler.DAGScheduler.abortStage(DAGScheduler.scala:1878)
E at org.apache.spark.scheduler.DAGScheduler$$anonfun$handleTaskSetFailed$1.apply(DAGScheduler.scala:927)
E at org.apache.spark.scheduler.DAGScheduler$$anonfun$handleTaskSetFailed$1.apply(DAGScheduler.scala:927)
E at scala.Option.foreach(Option.scala:257)
E at org.apache.spark.scheduler.DAGScheduler.handleTaskSetFailed(DAGScheduler.scala:927)
E at org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.doOnReceive(DAGScheduler.scala:2112)
E at org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.onReceive(DAGScheduler.scala:2061)
E at org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.onReceive(DAGScheduler.scala:2050)
E at org.apache.spark.util.EventLoop$$anon$1.run(EventLoop.scala:49)
E at org.apache.spark.scheduler.DAGScheduler.runJob(DAGScheduler.scala:738)
E at org.apache.spark.SparkContext.runJob(SparkContext.scala:2061)
E at org.apache.spark.SparkContext.runJob(SparkContext.scala:2158)
E at org.apache.spark.rdd.RDD$$anonfun$reduce$1.apply(RDD.scala:1080)
E at org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:151)
E at org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:112)
E at org.apache.spark.rdd.RDD.withScope(RDD.scala:385)
E at org.apache.spark.rdd.RDD.reduce(RDD.scala:1062)
E at org.apache.spark.sql.Dataset$$anonfun$reduce$1.apply(Dataset.scala:1643)
E at org.apache.spark.sql.Dataset$$anonfun$withNewRDDExecutionId$1.apply(Dataset.scala:3355)
E at org.apache.spark.sql.execution.SQLExecution$$anonfun$withNewExecutionId$1.apply(SQLExecution.scala:80)
E at org.apache.spark.sql.execution.SQLExecution$.withSQLConfPropagated(SQLExecution.scala:127)
E at org.apache.spark.sql.execution.SQLExecution$.withNewExecutionId(SQLExecution.scala:75)
E at org.apache.spark.sql.Dataset.withNewRDDExecutionId(Dataset.scala:3351)
E at org.apache.spark.sql.Dataset.reduce(Dataset.scala:1642)
E at com.microsoft.ml.spark.lightgbm.LightGBMBase$class.innerTrain(LightGBMBase.scala:90)
E at com.microsoft.ml.spark.lightgbm.LightGBMClassifier.innerTrain(LightGBMClassifier.scala:26)
E at com.microsoft.ml.spark.lightgbm.LightGBMBase$class.train(LightGBMBase.scala:38)
E at com.microsoft.ml.spark.lightgbm.LightGBMClassifier.train(LightGBMClassifier.scala:26)
E at com.microsoft.ml.spark.lightgbm.LightGBMClassifier.train(LightGBMClassifier.scala:26)
E at org.apache.spark.ml.Predictor.fit(Predictor.scala:118)
E at org.apache.spark.ml.Predictor.fit(Predictor.scala:82)
E at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
E at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)
E at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
E at java.lang.reflect.Method.invoke(Method.java:498)
E at py4j.reflection.MethodInvoker.invoke(MethodInvoker.java:244)
E at py4j.reflection.ReflectionEngine.invoke(ReflectionEngine.java:357)
E at py4j.Gateway.invoke(Gateway.java:282)
E at py4j.commands.AbstractCommand.invokeMethod(AbstractCommand.java:132)
E at py4j.commands.CallCommand.execute(CallCommand.java:79)
E at py4j.GatewayConnection.run(GatewayConnection.java:238)
E at java.lang.Thread.run(Thread.java:748)
E Caused by: java.lang.UnsatisfiedLinkError: com.microsoft.ml.lightgbm.lightgbmlibJNI.LGBM_NetworkFree()I
E at com.microsoft.ml.lightgbm.lightgbmlibJNI.LGBM_NetworkFree(Native Method)
E at com.microsoft.ml.lightgbm.lightgbmlib.LGBM_NetworkFree(lightgbmlib.java:209)
E at com.microsoft.ml.spark.lightgbm.TrainUtils$.trainLightGBM(TrainUtils.scala:415)
E at com.microsoft.ml.spark.lightgbm.LightGBMBase$$anonfun$6.apply(LightGBMBase.scala:85)
E at com.microsoft.ml.spark.lightgbm.LightGBMBase$$anonfun$6.apply(LightGBMBase.scala:85)
E at org.apache.spark.sql.execution.MapPartitionsExec$$anonfun$5.apply(objects.scala:188)
E at org.apache.spark.sql.execution.MapPartitionsExec$$anonfun$5.apply(objects.scala:185)
E at org.apache.spark.rdd.RDD$$anonfun$mapPartitionsInternal$1$$anonfun$apply$24.apply(RDD.scala:858)
E at org.apache.spark.rdd.RDD$$anonfun$mapPartitionsInternal$1$$anonfun$apply$24.apply(RDD.scala:858)
E at org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:52)
E at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:346)
E at org.apache.spark.rdd.RDD.iterator(RDD.scala:310)
E at org.apache.spark.sql.execution.SQLExecutionRDD.compute(SQLExecutionRDD.scala:55)
E at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:346)
E at org.apache.spark.rdd.RDD.iterator(RDD.scala:310)
E at org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:52)
E at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:346)
E at org.apache.spark.rdd.RDD.iterator(RDD.scala:310)
E at org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:90)
E at org.apache.spark.scheduler.Task.run(Task.scala:123)
E at org.apache.spark.executor.Executor$TaskRunner$$anonfun$10.apply(Executor.scala:408)
E at org.apache.spark.util.Utils$.tryWithSafeFinally(Utils.scala:1360)
E at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:414)
E at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149)
E at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624)
E ... 1 more
/anaconda/envs/reco_pyspark/lib/python3.6/site-packages/papermill/execute.py:248: PapermillExecutionError
-------------------------------------------------------------------------- Captured stderr call ---------------------------------------------------------------------------
4%|▍ | 1/24 [00:00<00:17, 1.34it/s]https://mvnrepository.com/artifact added as a remote repository with the name: repo-1
Ivy Default Cache set to: /home/recocat/.ivy2/cache
The jars for the packages stored in: /home/recocat/.ivy2/jars
:: loading settings :: url = jar:file:/dsvm/tools/spark/spark-2.4.5/jars/ivy-2.4.0.jar!/org/apache/ivy/core/settings/ivysettings.xml
com.microsoft.ml.spark#mmlspark_2.11 added as a dependency
:: resolving dependencies :: org.apache.spark#spark-submit-parent-97bd8959-5b55-44b3-8b5a-ef445f8ed0f1;1.0
confs: [default]
found com.microsoft.ml.spark#mmlspark_2.11;0.18.1 in central
found org.scalactic#scalactic_2.11;3.0.5 in central
found org.scala-lang#scala-reflect;2.11.12 in central
found org.scalatest#scalatest_2.11;3.0.5 in central
found org.scala-lang.modules#scala-xml_2.11;1.0.6 in central
found io.spray#spray-json_2.11;1.3.2 in central
found com.microsoft.cntk#cntk;2.4 in central
found org.openpnp#opencv;3.2.0-1 in central
found com.jcraft#jsch;0.1.54 in central
found org.apache.httpcomponents#httpclient;4.5.6 in central
found org.apache.httpcomponents#httpcore;4.4.10 in central
found commons-logging#commons-logging;1.2 in central
found commons-codec#commons-codec;1.10 in central
found com.microsoft.ml.lightgbm#lightgbmlib;2.2.350 in central
found com.github.vowpalwabbit#vw-jni;8.7.0.2 in central
downloading https://repo1.maven.org/maven2/com/microsoft/ml/spark/mmlspark_2.11/0.18.1/mmlspark_2.11-0.18.1.jar ...
[SUCCESSFUL ] com.microsoft.ml.spark#mmlspark_2.11;0.18.1!mmlspark_2.11.jar (122ms)
downloading https://repo1.maven.org/maven2/org/scalactic/scalactic_2.11/3.0.5/scalactic_2.11-3.0.5.jar ...
[SUCCESSFUL ] org.scalactic#scalactic_2.11;3.0.5!scalactic_2.11.jar(bundle) (24ms)
downloading https://repo1.maven.org/maven2/org/scalatest/scalatest_2.11/3.0.5/scalatest_2.11-3.0.5.jar ...
[SUCCESSFUL ] org.scalatest#scalatest_2.11;3.0.5!scalatest_2.11.jar(bundle) (203ms)
downloading https://repo1.maven.org/maven2/io/spray/spray-json_2.11/1.3.2/spray-json_2.11-1.3.2.jar ...
[SUCCESSFUL ] io.spray#spray-json_2.11;1.3.2!spray-json_2.11.jar(bundle) (11ms)
downloading https://repo1.maven.org/maven2/com/microsoft/cntk/cntk/2.4/cntk-2.4.jar ...
[SUCCESSFUL ] com.microsoft.cntk#cntk;2.4!cntk.jar (1308ms)
downloading https://repo1.maven.org/maven2/org/openpnp/opencv/3.2.0-1/opencv-3.2.0-1.jar ...
[SUCCESSFUL ] org.openpnp#opencv;3.2.0-1!opencv.jar(bundle) (405ms)
downloading https://repo1.maven.org/maven2/com/jcraft/jsch/0.1.54/jsch-0.1.54.jar ...
[SUCCESSFUL ] com.jcraft#jsch;0.1.54!jsch.jar (9ms)
downloading https://repo1.maven.org/maven2/org/apache/httpcomponents/httpclient/4.5.6/httpclient-4.5.6.jar ...
[SUCCESSFUL ] org.apache.httpcomponents#httpclient;4.5.6!httpclient.jar (12ms)
downloading https://repo1.maven.org/maven2/com/microsoft/ml/lightgbm/lightgbmlib/2.2.350/lightgbmlib-2.2.350.jar ...
[SUCCESSFUL ] com.microsoft.ml.lightgbm#lightgbmlib;2.2.350!lightgbmlib.jar (24ms)
downloading https://repo1.maven.org/maven2/com/github/vowpalwabbit/vw-jni/8.7.0.2/vw-jni-8.7.0.2.jar ...
[SUCCESSFUL ] com.github.vowpalwabbit#vw-jni;8.7.0.2!vw-jni.jar (36ms)
downloading https://repo1.maven.org/maven2/org/scala-lang/scala-reflect/2.11.12/scala-reflect-2.11.12.jar ...
[SUCCESSFUL ] org.scala-lang#scala-reflect;2.11.12!scala-reflect.jar (36ms)
downloading https://repo1.maven.org/maven2/org/scala-lang/modules/scala-xml_2.11/1.0.6/scala-xml_2.11-1.0.6.jar ...
[SUCCESSFUL ] org.scala-lang.modules#scala-xml_2.11;1.0.6!scala-xml_2.11.jar(bundle) (13ms)
downloading https://repo1.maven.org/maven2/org/apache/httpcomponents/httpcore/4.4.10/httpcore-4.4.10.jar ...
[SUCCESSFUL ] org.apache.httpcomponents#httpcore;4.4.10!httpcore.jar (12ms)
downloading https://repo1.maven.org/maven2/commons-logging/commons-logging/1.2/commons-logging-1.2.jar ...
[SUCCESSFUL ] commons-logging#commons-logging;1.2!commons-logging.jar (10ms)
downloading https://repo1.maven.org/maven2/commons-codec/commons-codec/1.10/commons-codec-1.10.jar ...
[SUCCESSFUL ] commons-codec#commons-codec;1.10!commons-codec.jar (13ms)
:: resolution report :: resolve 3403ms :: artifacts dl 2243ms
:: modules in use:
com.github.vowpalwabbit#vw-jni;8.7.0.2 from central in [default]
com.jcraft#jsch;0.1.54 from central in [default]
com.microsoft.cntk#cntk;2.4 from central in [default]
com.microsoft.ml.lightgbm#lightgbmlib;2.2.350 from central in [default]
com.microsoft.ml.spark#mmlspark_2.11;0.18.1 from central in [default]
commons-codec#commons-codec;1.10 from central in [default]
commons-logging#commons-logging;1.2 from central in [default]
io.spray#spray-json_2.11;1.3.2 from central in [default]
org.apache.httpcomponents#httpclient;4.5.6 from central in [default]
org.apache.httpcomponents#httpcore;4.4.10 from central in [default]
org.openpnp#opencv;3.2.0-1 from central in [default]
org.scala-lang#scala-reflect;2.11.12 from central in [default]
org.scala-lang.modules#scala-xml_2.11;1.0.6 from central in [default]
org.scalactic#scalactic_2.11;3.0.5 from central in [default]
org.scalatest#scalatest_2.11;3.0.5 from central in [default]
---------------------------------------------------------------------
| | modules || artifacts |
| conf | number| search|dwnlded|evicted|| number|dwnlded|
---------------------------------------------------------------------
| default | 15 | 15 | 15 | 0 || 15 | 15 |
---------------------------------------------------------------------
:: retrieving :: org.apache.spark#spark-submit-parent-97bd8959-5b55-44b3-8b5a-ef445f8ed0f1
confs: [default]
15 artifacts copied, 0 already retrieved (288549kB/287ms)
20/10/16 13:25:51 WARN NativeCodeLoader: Unable to load native-hadoop library for your platform... using builtin-java classes where applicable
Using Spark's default log4j profile: org/apache/spark/log4j-defaults.properties
Setting default log level to "WARN".
To adjust logging level use sc.setLogLevel(newLevel). For SparkR, use setLogLevel(newLevel).
33%|███▎ | 8/24 [00:11<00:14, 1.09it/s]20/10/16 13:25:57 WARN Utils: Truncated the string representation of a plan since it was too large. This behavior can be adjusted by setting 'spark.debug.maxToStringFields' in SparkEnv.conf.
67%|██████▋ | 16/24 [00:31<00:09, 1.17s/it]20/10/16 13:26:19 ERROR Executor: Exception in task 5.0 in stage 6.0 (TID 26)
java.lang.UnsatisfiedLinkError: com.microsoft.ml.lightgbm.lightgbmlibJNI.LGBM_NetworkFree()I
at com.microsoft.ml.lightgbm.lightgbmlibJNI.LGBM_NetworkFree(Native Method)
at com.microsoft.ml.lightgbm.lightgbmlib.LGBM_NetworkFree(lightgbmlib.java:209)
at com.microsoft.ml.spark.lightgbm.TrainUtils$.trainLightGBM(TrainUtils.scala:415)
at com.microsoft.ml.spark.lightgbm.LightGBMBase$$anonfun$6.apply(LightGBMBase.scala:85)
at com.microsoft.ml.spark.lightgbm.LightGBMBase$$anonfun$6.apply(LightGBMBase.scala:85)
at org.apache.spark.sql.execution.MapPartitionsExec$$anonfun$5.apply(objects.scala:188)
at org.apache.spark.sql.execution.MapPartitionsExec$$anonfun$5.apply(objects.scala:185)
at org.apache.spark.rdd.RDD$$anonfun$mapPartitionsInternal$1$$anonfun$apply$24.apply(RDD.scala:858)
at org.apache.spark.rdd.RDD$$anonfun$mapPartitionsInternal$1$$anonfun$apply$24.apply(RDD.scala:858)
at org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:52)
at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:346)
at org.apache.spark.rdd.RDD.iterator(RDD.scala:310)
at org.apache.spark.sql.execution.SQLExecutionRDD.compute(SQLExecutionRDD.scala:55)
at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:346)
at org.apache.spark.rdd.RDD.iterator(RDD.scala:310)
at org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:52)
at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:346)
at org.apache.spark.rdd.RDD.iterator(RDD.scala:310)
at org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:90)
at org.apache.spark.scheduler.Task.run(Task.scala:123)
at org.apache.spark.executor.Executor$TaskRunner$$anonfun$10.apply(Executor.scala:408)
at org.apache.spark.util.Utils$.tryWithSafeFinally(Utils.scala:1360)
at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:414)
at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149)
at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624)
at java.lang.Thread.run(Thread.java:748)
20/10/16 13:26:19 ERROR Executor: Exception in task 4.0 in stage 6.0 (TID 25)
java.lang.UnsatisfiedLinkError: com.microsoft.ml.lightgbm.lightgbmlibJNI.LGBM_NetworkFree()I
at com.microsoft.ml.lightgbm.lightgbmlibJNI.LGBM_NetworkFree(Native Method)
at com.microsoft.ml.lightgbm.lightgbmlib.LGBM_NetworkFree(lightgbmlib.java:209)
at com.microsoft.ml.spark.lightgbm.TrainUtils$.trainLightGBM(TrainUtils.scala:415)
at com.microsoft.ml.spark.lightgbm.LightGBMBase$$anonfun$6.apply(LightGBMBase.scala:85)
at com.microsoft.ml.spark.lightgbm.LightGBMBase$$anonfun$6.apply(LightGBMBase.scala:85)
at org.apache.spark.sql.execution.MapPartitionsExec$$anonfun$5.apply(objects.scala:188)
at org.apache.spark.sql.execution.MapPartitionsExec$$anonfun$5.apply(objects.scala:185)
at org.apache.spark.rdd.RDD$$anonfun$mapPartitionsInternal$1$$anonfun$apply$24.apply(RDD.scala:858)
at org.apache.spark.rdd.RDD$$anonfun$mapPartitionsInternal$1$$anonfun$apply$24.apply(RDD.scala:858)
at org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:52)
at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:346)
at org.apache.spark.rdd.RDD.iterator(RDD.scala:310)
at org.apache.spark.sql.execution.SQLExecutionRDD.compute(SQLExecutionRDD.scala:55)
at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:346)
at org.apache.spark.rdd.RDD.iterator(RDD.scala:310)
at org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:52)
at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:346)
at org.apache.spark.rdd.RDD.iterator(RDD.scala:310)
at org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:90)
at org.apache.spark.scheduler.Task.run(Task.scala:123)
at org.apache.spark.executor.Executor$TaskRunner$$anonfun$10.apply(Executor.scala:408)
at org.apache.spark.util.Utils$.tryWithSafeFinally(Utils.scala:1360)
at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:414)
at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149)
at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624)
at java.lang.Thread.run(Thread.java:748)
20/10/16 13:26:19 ERROR Executor: Exception in task 2.0 in stage 6.0 (TID 23)
java.lang.UnsatisfiedLinkError: com.microsoft.ml.lightgbm.lightgbmlibJNI.LGBM_NetworkFree()I
at com.microsoft.ml.lightgbm.lightgbmlibJNI.LGBM_NetworkFree(Native Method)
at com.microsoft.ml.lightgbm.lightgbmlib.LGBM_NetworkFree(lightgbmlib.java:209)
at com.microsoft.ml.spark.lightgbm.TrainUtils$.trainLightGBM(TrainUtils.scala:415)
at com.microsoft.ml.spark.lightgbm.LightGBMBase$$anonfun$6.apply(LightGBMBase.scala:85)
at com.microsoft.ml.spark.lightgbm.LightGBMBase$$anonfun$6.apply(LightGBMBase.scala:85)
at org.apache.spark.sql.execution.MapPartitionsExec$$anonfun$5.apply(objects.scala:188)
at org.apache.spark.sql.execution.MapPartitionsExec$$anonfun$5.apply(objects.scala:185)
at org.apache.spark.rdd.RDD$$anonfun$mapPartitionsInternal$1$$anonfun$apply$24.apply(RDD.scala:858)
at org.apache.spark.rdd.RDD$$anonfun$mapPartitionsInternal$1$$anonfun$apply$24.apply(RDD.scala:858)
at org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:52)
at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:346)
at org.apache.spark.rdd.RDD.iterator(RDD.scala:310)
at org.apache.spark.sql.execution.SQLExecutionRDD.compute(SQLExecutionRDD.scala:55)
at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:346)
at org.apache.spark.rdd.RDD.iterator(RDD.scala:310)
at org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:52)
at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:346)
at org.apache.spark.rdd.RDD.iterator(RDD.scala:310)
at org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:90)
at org.apache.spark.scheduler.Task.run(Task.scala:123)
at org.apache.spark.executor.Executor$TaskRunner$$anonfun$10.apply(Executor.scala:408)
at org.apache.spark.util.Utils$.tryWithSafeFinally(Utils.scala:1360)
at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:414)
at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149)
at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624)
at java.lang.Thread.run(Thread.java:748)
20/10/16 13:26:19 ERROR Executor: Exception in task 0.0 in stage 6.0 (TID 21)
java.lang.UnsatisfiedLinkError: com.microsoft.ml.lightgbm.lightgbmlibJNI.LGBM_NetworkFree()I
at com.microsoft.ml.lightgbm.lightgbmlibJNI.LGBM_NetworkFree(Native Method)
at com.microsoft.ml.lightgbm.lightgbmlib.LGBM_NetworkFree(lightgbmlib.java:209)
at com.microsoft.ml.spark.lightgbm.TrainUtils$.trainLightGBM(TrainUtils.scala:415)
at com.microsoft.ml.spark.lightgbm.LightGBMBase$$anonfun$6.apply(LightGBMBase.scala:85)
at com.microsoft.ml.spark.lightgbm.LightGBMBase$$anonfun$6.apply(LightGBMBase.scala:85)
at org.apache.spark.sql.execution.MapPartitionsExec$$anonfun$5.apply(objects.scala:188)
at org.apache.spark.sql.execution.MapPartitionsExec$$anonfun$5.apply(objects.scala:185)
at org.apache.spark.rdd.RDD$$anonfun$mapPartitionsInternal$1$$anonfun$apply$24.apply(RDD.scala:858)
at org.apache.spark.rdd.RDD$$anonfun$mapPartitionsInternal$1$$anonfun$apply$24.apply(RDD.scala:858)
at org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:52)
at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:346)
at org.apache.spark.rdd.RDD.iterator(RDD.scala:310)
at org.apache.spark.sql.execution.SQLExecutionRDD.compute(SQLExecutionRDD.scala:55)
at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:346)
at org.apache.spark.rdd.RDD.iterator(RDD.scala:310)
at org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:52)
at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:346)
at org.apache.spark.rdd.RDD.iterator(RDD.scala:310)
at org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:90)
at org.apache.spark.scheduler.Task.run(Task.scala:123)
at org.apache.spark.executor.Executor$TaskRunner$$anonfun$10.apply(Executor.scala:408)
at org.apache.spark.util.Utils$.tryWithSafeFinally(Utils.scala:1360)
at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:414)
at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149)
at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624)
at java.lang.Thread.run(Thread.java:748)
20/10/16 13:26:19 ERROR Executor: Exception in task 1.0 in stage 6.0 (TID 22)
java.lang.UnsatisfiedLinkError: com.microsoft.ml.lightgbm.lightgbmlibJNI.LGBM_NetworkFree()I
at com.microsoft.ml.lightgbm.lightgbmlibJNI.LGBM_NetworkFree(Native Method)
at com.microsoft.ml.lightgbm.lightgbmlib.LGBM_NetworkFree(lightgbmlib.java:209)
at com.microsoft.ml.spark.lightgbm.TrainUtils$.trainLightGBM(TrainUtils.scala:415)
at com.microsoft.ml.spark.lightgbm.LightGBMBase$$anonfun$6.apply(LightGBMBase.scala:85)
at com.microsoft.ml.spark.lightgbm.LightGBMBase$$anonfun$6.apply(LightGBMBase.scala:85)
at org.apache.spark.sql.execution.MapPartitionsExec$$anonfun$5.apply(objects.scala:188)
at org.apache.spark.sql.execution.MapPartitionsExec$$anonfun$5.apply(objects.scala:185)
at org.apache.spark.rdd.RDD$$anonfun$mapPartitionsInternal$1$$anonfun$apply$24.apply(RDD.scala:858)
at org.apache.spark.rdd.RDD$$anonfun$mapPartitionsInternal$1$$anonfun$apply$24.apply(RDD.scala:858)
at org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:52)
at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:346)
at org.apache.spark.rdd.RDD.iterator(RDD.scala:310)
at org.apache.spark.sql.execution.SQLExecutionRDD.compute(SQLExecutionRDD.scala:55)
at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:346)
at org.apache.spark.rdd.RDD.iterator(RDD.scala:310)
at org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:52)
at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:346)
at org.apache.spark.rdd.RDD.iterator(RDD.scala:310)
at org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:90)
at org.apache.spark.scheduler.Task.run(Task.scala:123)
at org.apache.spark.executor.Executor$TaskRunner$$anonfun$10.apply(Executor.scala:408)
at org.apache.spark.util.Utils$.tryWithSafeFinally(Utils.scala:1360)
at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:414)
at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149)
at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624)
at java.lang.Thread.run(Thread.java:748)
20/10/16 13:26:20 ERROR Executor: Exception in task 3.0 in stage 6.0 (TID 24)
java.lang.UnsatisfiedLinkError: com.microsoft.ml.lightgbm.lightgbmlibJNI.LGBM_NetworkFree()I
at com.microsoft.ml.lightgbm.lightgbmlibJNI.LGBM_NetworkFree(Native Method)
at com.microsoft.ml.lightgbm.lightgbmlib.LGBM_NetworkFree(lightgbmlib.java:209)
at com.microsoft.ml.spark.lightgbm.TrainUtils$.trainLightGBM(TrainUtils.scala:415)
at com.microsoft.ml.spark.lightgbm.LightGBMBase$$anonfun$6.apply(LightGBMBase.scala:85)
at com.microsoft.ml.spark.lightgbm.LightGBMBase$$anonfun$6.apply(LightGBMBase.scala:85)
at org.apache.spark.sql.execution.MapPartitionsExec$$anonfun$5.apply(objects.scala:188)
at org.apache.spark.sql.execution.MapPartitionsExec$$anonfun$5.apply(objects.scala:185)
at org.apache.spark.rdd.RDD$$anonfun$mapPartitionsInternal$1$$anonfun$apply$24.apply(RDD.scala:858)
at org.apache.spark.rdd.RDD$$anonfun$mapPartitionsInternal$1$$anonfun$apply$24.apply(RDD.scala:858)
at org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:52)
at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:346)
at org.apache.spark.rdd.RDD.iterator(RDD.scala:310)
at org.apache.spark.sql.execution.SQLExecutionRDD.compute(SQLExecutionRDD.scala:55)
at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:346)
at org.apache.spark.rdd.RDD.iterator(RDD.scala:310)
at org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:52)
at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:346)
at org.apache.spark.rdd.RDD.iterator(RDD.scala:310)
at org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:90)
at org.apache.spark.scheduler.Task.run(Task.scala:123)
at org.apache.spark.executor.Executor$TaskRunner$$anonfun$10.apply(Executor.scala:408)
at org.apache.spark.util.Utils$.tryWithSafeFinally(Utils.scala:1360)
at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:414)
at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149)
at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624)
at java.lang.Thread.run(Thread.java:748)
20/10/16 13:26:20 WARN TaskSetManager: Lost task 1.0 in stage 6.0 (TID 22, localhost, executor driver): java.lang.UnsatisfiedLinkError: com.microsoft.ml.lightgbm.lightgbmlibJNI.LGBM_NetworkFree()I
at com.microsoft.ml.lightgbm.lightgbmlibJNI.LGBM_NetworkFree(Native Method)
at com.microsoft.ml.lightgbm.lightgbmlib.LGBM_NetworkFree(lightgbmlib.java:209)
at com.microsoft.ml.spark.lightgbm.TrainUtils$.trainLightGBM(TrainUtils.scala:415)
at com.microsoft.ml.spark.lightgbm.LightGBMBase$$anonfun$6.apply(LightGBMBase.scala:85)
at com.microsoft.ml.spark.lightgbm.LightGBMBase$$anonfun$6.apply(LightGBMBase.scala:85)
at org.apache.spark.sql.execution.MapPartitionsExec$$anonfun$5.apply(objects.scala:188)
at org.apache.spark.sql.execution.MapPartitionsExec$$anonfun$5.apply(objects.scala:185)
at org.apache.spark.rdd.RDD$$anonfun$mapPartitionsInternal$1$$anonfun$apply$24.apply(RDD.scala:858)
at org.apache.spark.rdd.RDD$$anonfun$mapPartitionsInternal$1$$anonfun$apply$24.apply(RDD.scala:858)
at org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:52)
at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:346)
at org.apache.spark.rdd.RDD.iterator(RDD.scala:310)
at org.apache.spark.sql.execution.SQLExecutionRDD.compute(SQLExecutionRDD.scala:55)
at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:346)
at org.apache.spark.rdd.RDD.iterator(RDD.scala:310)
at org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:52)
at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:346)
at org.apache.spark.rdd.RDD.iterator(RDD.scala:310)
at org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:90)
at org.apache.spark.scheduler.Task.run(Task.scala:123)
at org.apache.spark.executor.Executor$TaskRunner$$anonfun$10.apply(Executor.scala:408)
at org.apache.spark.util.Utils$.tryWithSafeFinally(Utils.scala:1360)
at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:414)
at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149)
at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624)
at java.lang.Thread.run(Thread.java:748)
20/10/16 13:26:20 ERROR TaskSetManager: Task 1 in stage 6.0 failed 1 times; aborting job
71%|███████ | 17/24 [00:37<00:15, 2.22s/it]
============================================================================ slowest durations ============================================================================
116.96s call tests/unit/test_notebooks_pyspark.py::test_spark_tuning
108.13s call tests/unit/test_notebooks_pyspark.py::test_als_deep_dive_runs
45.63s call tests/unit/test_notebooks_pyspark.py::test_als_pyspark_runs
39.47s call tests/unit/test_notebooks_pyspark.py::test_evaluation_runs
37.73s call tests/unit/test_notebooks_pyspark.py::test_mmlspark_lightgbm_criteo_runs
28.27s call tests/unit/test_notebooks_pyspark.py::test_data_split_runs
(12 durations < 0.005s hidden. Use -vv to show these durations.)
========================================================================= short test summary info =========================================================================
FAILED tests/unit/test_notebooks_pyspark.py::test_mmlspark_lightgbm_criteo_runs - papermill.exceptions.PapermillExecutionError:
================================================== 1 failed, 5 passed, 212 deselected, 106 warnings in 380.60s (0:06:20) ==================================================
The solution proposed by @gramhagen was to remove the following files from the DSVM path: /dsvm/tools/spark/current/jars
Azure_mmlspark-0.12.jar
com.microsoft.cntk_cntk-2.4.jar
com.microsoft.ml.lightgbm_lightgbmlib-2.0.120.jar
Banibrata De is looking to remove these files from the default DSVM
conda ativate reco_base
pytest tests/smoke -m "smoke and not spark and not gpu" --durations 0 --disable-warnings
============================================================ test session starts =============================================================
platform linux -- Python 3.6.11, pytest-6.1.1, py-1.9.0, pluggy-0.13.1
rootdir: /home/recocat/notebooks/repos/recommenders
collected 36 items / 22 deselected / 14 selected
tests/smoke/test_criteo.py ... [ 21%]
tests/smoke/test_mind.py FF [ 35%]
tests/smoke/test_movielens.py ... [ 57%]
tests/smoke/test_notebooks_python.py ...... [100%]
================================================================== FAILURES ==================================================================
_____________________________________________________________ test_download_mind _____________________________________________________________
tmp = '/tmp/pytest-of-recocat/pytest-3/tmptghqeu5n'
@pytest.mark.smoke
def test_download_mind(tmp):
train_path, valid_path = download_mind(size="small", dest_path=tmp)
statinfo = os.stat(train_path)
> assert statinfo.st_size == 54772957
E assert 52952752 == 54772957
E + where 52952752 = os.stat_result(st_mode=33204, st_ino=12129024, st_dev=2065, st_nlink=1, st_uid=1000, st_gid=1000, st_size=52952752, st_atime=1602763139, st_mtime=1602763142, st_ctime=1602763142).st_size
tests/smoke/test_mind.py:13: AssertionError
------------------------------------------------------------ Captured stderr call ------------------------------------------------------------
100%|██████████| 51.7k/51.7k [00:02<00:00, 19.0kKB/s]
100%|██████████| 30.2k/30.2k [00:01<00:00, 15.4kKB/s]
_____________________________________________________________ test_extract_mind ______________________________________________________________
tmp = '/tmp/pytest-of-recocat/pytest-3/tmppxqmq4ms'
@pytest.mark.smoke
def test_extract_mind(tmp):
train_zip, valid_zip = download_mind(size="small", dest_path=tmp)
train_path, valid_path = extract_mind(train_zip, valid_zip)
statinfo = os.stat(os.path.join(train_path, "behaviors.tsv"))
> assert statinfo.st_size == 92047111
E assert 92019716 == 92047111
E + where 92019716 = os.stat_result(st_mode=33204, st_ino=12129028, st_dev=2065, st_nlink=1, st_uid=1000, st_gid=1000, st_size=92019716, st_atime=1602763160, st_mtime=1602763161, st_ctime=1602763161).st_size
tests/smoke/test_mind.py:24: AssertionError
------------------------------------------------------------ Captured stderr call ------------------------------------------------------------
100%|██████████| 51.7k/51.7k [00:11<00:00, 4.35kKB/s]
100%|██████████| 30.2k/30.2k [00:01<00:00, 15.6kKB/s]
============================================================= slowest durations ==============================================================
455.07s call tests/smoke/test_notebooks_python.py::test_vw_deep_dive_smoke
99.00s call tests/smoke/test_notebooks_python.py::test_lightgbm_quickstart_smoke
72.84s call tests/smoke/test_notebooks_python.py::test_surprise_svd_smoke
32.47s call tests/smoke/test_notebooks_python.py::test_cornac_bpr_smoke
29.89s call tests/smoke/test_notebooks_python.py::test_baseline_deep_dive_smoke
25.16s call tests/smoke/test_notebooks_python.py::test_sar_single_node_smoke
16.26s call tests/smoke/test_mind.py::test_extract_mind
5.42s call tests/smoke/test_mind.py::test_download_mind
2.44s call tests/smoke/test_movielens.py::test_load_pandas_df[100k-100000-1682-1-Toy Story (1995)-Animation|Children's|Comedy-1995]
2.16s call tests/smoke/test_criteo.py::test_criteo_load_pandas_df
1.56s call tests/smoke/test_criteo.py::test_extract_criteo
1.37s call tests/smoke/test_criteo.py::test_download_criteo
0.41s call tests/smoke/test_movielens.py::test_load_item_df[100k-1682-1-Toy Story (1995)-Animation|Children's|Comedy-1995]
0.37s call tests/smoke/test_movielens.py::test_download_and_extract_movielens[100k]
0.01s teardown tests/smoke/test_mind.py::test_download_mind
(27 durations < 0.005s hidden. Use -vv to show these durations.)
========================================================== short test summary info ===========================================================
FAILED tests/smoke/test_mind.py::test_download_mind - assert 52952752 == 54772957
FAILED tests/smoke/test_mind.py::test_extract_mind - assert 92019716 == 92047111
==================================== 2 failed, 12 passed, 22 deselected, 73 warnings in 748.08s (0:12:28) ====================================
fixed with https://github.com/microsoft/recommenders/pull/1218
conda activate reco_gpu
pytest tests/smoke -m "smoke and not spark and gpu" --durations 0 --disable-warnings
============================================================ test session starts =============================================================
platform linux -- Python 3.6.11, pytest-6.1.1, py-1.9.0, pluggy-0.13.1
rootdir: /home/recocat/notebooks/repos/recommenders
collected 36 items / 18 deselected / 18 selected
tests/smoke/test_deeprec_model.py .... [ 22%]
tests/smoke/test_newsrec_model.py .... [ 44%]
tests/smoke/test_notebooks_gpu.py ......F.FF [100%]
================================================================== FAILURES ==================================================================
______________________________________________________________ test_naml_smoke _______________________________________________________________
notebooks = {'als_deep_dive': '/home/recocat/notebooks/repos/recommenders/examples/02_model_collaborative_filtering/als_deep_dive....'/home/recocat/notebooks/repos/recommenders/examples/02_model_collaborative_filtering/cornac_bpr_deep_dive.ipynb', ...}
@pytest.mark.smoke
@pytest.mark.gpu
def test_naml_smoke(notebooks):
notebook_path = notebooks["naml_quickstart"]
pm.execute_notebook(
notebook_path,
OUTPUT_NOTEBOOK,
kernel_name=KERNEL_NAME,
> parameters=dict(epochs=1, seed=42, MIND_type="demo"),
)
tests/smoke/test_notebooks_gpu.py:143:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
/anaconda/envs/reco_gpu/lib/python3.6/site-packages/papermill/execute.py:100: in execute_notebook
raise_for_execution_errors(nb, output_path)
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
nb = {'cells': [{'cell_type': 'code', 'metadata': {'inputHidden': True, 'hide_input': True}, 'execution_count': None, 'sour...nd_time': '2020-10-15T12:33:13.505844', 'duration': 15.343943, 'exception': True}}, 'nbformat': 4, 'nbformat_minor': 4}
output_path = 'output.ipynb'
def raise_for_execution_errors(nb, output_path):
"""Assigned parameters into the appropriate place in the input notebook
Parameters
----------
nb : NotebookNode
Executable notebook object
output_path : str
Path to write executed notebook
"""
error = None
for cell in nb.cells:
if cell.get("outputs") is None:
continue
for output in cell.outputs:
if output.output_type == "error":
error = PapermillExecutionError(
exec_count=cell.execution_count,
source=cell.source,
ename=output.ename,
evalue=output.evalue,
traceback=output.traceback,
)
break
if error:
# Write notebook back out with the Error Message at the top of the Notebook.
error_msg = ERROR_MESSAGE_TEMPLATE % str(error.exec_count)
error_msg_cell = nbformat.v4.new_code_cell(
source="%%html\n" + error_msg,
outputs=[
nbformat.v4.new_output(output_type="display_data", data={"text/html": error_msg})
],
metadata={"inputHidden": True, "hide_input": True},
)
nb.cells = [error_msg_cell] + nb.cells
write_ipynb(nb, output_path)
> raise error
E papermill.exceptions.PapermillExecutionError:
E ---------------------------------------------------------------------------
E Exception encountered at "In [8]":
E ---------------------------------------------------------------------------
E UnknownError Traceback (most recent call last)
E <ipython-input-8-a91b7bddd47e> in <module>
E ----> 1 print(model.run_eval(valid_news_file, valid_behaviors_file))
E
E ~/notebooks/repos/recommenders/reco_utils/recommender/newsrec/models/base_model.py in run_eval(self, news_filename, behaviors_file)
E 309 if self.support_quick_scoring:
E 310 _, group_labels, group_preds = self.run_fast_eval(
E --> 311 news_filename, behaviors_file
E 312 )
E 313 else:
E
E ~/notebooks/repos/recommenders/reco_utils/recommender/newsrec/models/base_model.py in run_fast_eval(self, news_filename, behaviors_file)
E 381
E 382 def run_fast_eval(self, news_filename, behaviors_file):
E --> 383 news_vecs = self.run_news(news_filename)
E 384 user_vecs = self.run_user(news_filename, behaviors_file)
E 385
E
E ~/notebooks/repos/recommenders/reco_utils/recommender/newsrec/models/base_model.py in run_news(self, news_filename)
E 356 self.test_iterator.load_news_from_file(news_filename)
E 357 ):
E --> 358 news_index, news_vec = self.news(batch_data_input)
E 359 news_indexes.extend(np.reshape(news_index, -1))
E 360 news_vecs.extend(news_vec)
E
E ~/notebooks/repos/recommenders/reco_utils/recommender/newsrec/models/base_model.py in news(self, batch_news_input)
E 327 def news(self, batch_news_input):
E 328 news_input = self._get_news_feature_from_iter(batch_news_input)
E --> 329 news_vec = self.newsencoder.predict_on_batch(news_input)
E 330 news_index = batch_news_input["news_index_batch"]
E 331
E
E /anaconda/envs/reco_gpu/lib/python3.6/site-packages/tensorflow_core/python/keras/engine/training.py in predict_on_batch(self, x)
E 1154
E 1155 self._make_predict_function()
E -> 1156 outputs = self.predict_function(inputs)
E 1157
E 1158 if len(outputs) == 1:
E
E /anaconda/envs/reco_gpu/lib/python3.6/site-packages/tensorflow_core/python/keras/backend.py in __call__(self, inputs)
E 3474
E 3475 fetched = self._callable_fn(*array_vals,
E -> 3476 run_metadata=self.run_metadata)
E 3477 self._call_fetch_callbacks(fetched[-len(self._fetches):])
E 3478 output_structure = nest.pack_sequence_as(
E
E /anaconda/envs/reco_gpu/lib/python3.6/site-packages/tensorflow_core/python/client/session.py in __call__(self, *args, **kwargs)
E 1470 ret = tf_session.TF_SessionRunCallable(self._session._session,
E 1471 self._handle, args,
E -> 1472 run_metadata_ptr)
E 1473 if run_metadata:
E 1474 proto_data = tf_session.TF_GetBuffer(run_metadata_ptr)
E
E UnknownError: 2 root error(s) found.
E (0) Unknown: Failed to get convolution algorithm. This is probably because cuDNN failed to initialize, so try looking to see if a warning log message was printed above.
E [[{{node title_encoder/conv1d/conv1d}}]]
E [[att_layer2_2/Sum_1/_83]]
E (1) Unknown: Failed to get convolution algorithm. This is probably because cuDNN failed to initialize, so try looking to see if a warning log message was printed above.
E [[{{node title_encoder/conv1d/conv1d}}]]
E 0 successful operations.
E 0 derived errors ignored.
/anaconda/envs/reco_gpu/lib/python3.6/site-packages/papermill/execute.py:248: PapermillExecutionError
------------------------------------------------------------ Captured stderr call ------------------------------------------------------------
46%|████▌ | 11/24 [00:08<00:11, 1.17it/s]2020-10-15 12:33:06.874916: I tensorflow/stream_executor/platform/default/dso_loader.cc:44] Successfully opened dynamic library libcuda.so.1
2020-10-15 12:33:06.891299: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1639] Found device 0 with properties:
name: Tesla V100-PCIE-16GB major: 7 minor: 0 memoryClockRate(GHz): 1.38
pciBusID: 0001:00:00.0
2020-10-15 12:33:06.891510: I tensorflow/stream_executor/platform/default/dso_loader.cc:44] Successfully opened dynamic library libcudart.so.10.0
2020-10-15 12:33:06.892571: I tensorflow/stream_executor/platform/default/dso_loader.cc:44] Successfully opened dynamic library libcublas.so.10.0
2020-10-15 12:33:06.893556: I tensorflow/stream_executor/platform/default/dso_loader.cc:44] Successfully opened dynamic library libcufft.so.10.0
2020-10-15 12:33:06.893851: I tensorflow/stream_executor/platform/default/dso_loader.cc:44] Successfully opened dynamic library libcurand.so.10.0
2020-10-15 12:33:06.895087: I tensorflow/stream_executor/platform/default/dso_loader.cc:44] Successfully opened dynamic library libcusolver.so.10.0
2020-10-15 12:33:06.896040: I tensorflow/stream_executor/platform/default/dso_loader.cc:44] Successfully opened dynamic library libcusparse.so.10.0
2020-10-15 12:33:06.899551: I tensorflow/stream_executor/platform/default/dso_loader.cc:44] Successfully opened dynamic library libcudnn.so.7
2020-10-15 12:33:06.900711: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1767] Adding visible gpu devices: 0
2020-10-15 12:33:06.900999: I tensorflow/core/platform/cpu_feature_guard.cc:142] Your CPU supports instructions that this TensorFlow binary was not compiled to use: AVX2 FMA
2020-10-15 12:33:06.907356: I tensorflow/core/platform/profile_utils/cpu_utils.cc:94] CPU Frequency: 2593990000 Hz
2020-10-15 12:33:06.907710: I tensorflow/compiler/xla/service/service.cc:168] XLA service 0x558319d5ddb0 initialized for platform Host (this does not guarantee that XLA will be used). Devices:
2020-10-15 12:33:06.907735: I tensorflow/compiler/xla/service/service.cc:176] StreamExecutor device (0): Host, Default Version
2020-10-15 12:33:07.197335: I tensorflow/compiler/xla/service/service.cc:168] XLA service 0x558319d10600 initialized for platform CUDA (this does not guarantee that XLA will be used). Devices:
2020-10-15 12:33:07.197380: I tensorflow/compiler/xla/service/service.cc:176] StreamExecutor device (0): Tesla V100-PCIE-16GB, Compute Capability 7.0
2020-10-15 12:33:07.198130: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1639] Found device 0 with properties:
name: Tesla V100-PCIE-16GB major: 7 minor: 0 memoryClockRate(GHz): 1.38
pciBusID: 0001:00:00.0
2020-10-15 12:33:07.198220: I tensorflow/stream_executor/platform/default/dso_loader.cc:44] Successfully opened dynamic library libcudart.so.10.0
2020-10-15 12:33:07.198254: I tensorflow/stream_executor/platform/default/dso_loader.cc:44] Successfully opened dynamic library libcublas.so.10.0
2020-10-15 12:33:07.198284: I tensorflow/stream_executor/platform/default/dso_loader.cc:44] Successfully opened dynamic library libcufft.so.10.0
2020-10-15 12:33:07.198313: I tensorflow/stream_executor/platform/default/dso_loader.cc:44] Successfully opened dynamic library libcurand.so.10.0
2020-10-15 12:33:07.198343: I tensorflow/stream_executor/platform/default/dso_loader.cc:44] Successfully opened dynamic library libcusolver.so.10.0
2020-10-15 12:33:07.198372: I tensorflow/stream_executor/platform/default/dso_loader.cc:44] Successfully opened dynamic library libcusparse.so.10.0
2020-10-15 12:33:07.198407: I tensorflow/stream_executor/platform/default/dso_loader.cc:44] Successfully opened dynamic library libcudnn.so.7
2020-10-15 12:33:07.199505: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1767] Adding visible gpu devices: 0
2020-10-15 12:33:07.199581: I tensorflow/stream_executor/platform/default/dso_loader.cc:44] Successfully opened dynamic library libcudart.so.10.0
2020-10-15 12:33:07.201007: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1180] Device interconnect StreamExecutor with strength 1 edge matrix:
2020-10-15 12:33:07.201032: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1186] 0
2020-10-15 12:33:07.201041: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1199] 0: N
2020-10-15 12:33:07.202253: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1325] Created TensorFlow device (/job:localhost/replica:0/task:0/device:GPU:0 with 6479 MB memory) -> physical GPU (device: 0, name: Tesla V100-PCIE-16GB, pci bus id: 0001:00:00.0, compute capability: 7.0)
58%|█████▊ | 14/24 [00:11<00:09, 1.05it/s]2020-10-15 12:33:11.505509: I tensorflow/stream_executor/platform/default/dso_loader.cc:44] Successfully opened dynamic library libcublas.so.10.0
2020-10-15 12:33:11.698353: I tensorflow/stream_executor/platform/default/dso_loader.cc:44] Successfully opened dynamic library libcudnn.so.7
2020-10-15 12:33:12.026697: E tensorflow/stream_executor/cuda/cuda_dnn.cc:329] Could not create cudnn handle: CUDNN_STATUS_INTERNAL_ERROR
2020-10-15 12:33:12.029884: E tensorflow/stream_executor/cuda/cuda_dnn.cc:329] Could not create cudnn handle: CUDNN_STATUS_INTERNAL_ERROR
2020-10-15 12:33:12.035251: E tensorflow/stream_executor/cuda/cuda_dnn.cc:329] Could not create cudnn handle: CUDNN_STATUS_INTERNAL_ERROR
2020-10-15 12:33:12.038791: E tensorflow/stream_executor/cuda/cuda_dnn.cc:329] Could not create cudnn handle: CUDNN_STATUS_INTERNAL_ERROR
62%|██████▎ | 15/24 [00:15<00:09, 1.02s/it]
_______________________________________________________________ test_npa_smoke _______________________________________________________________
notebooks = {'als_deep_dive': '/home/recocat/notebooks/repos/recommenders/examples/02_model_collaborative_filtering/als_deep_dive....'/home/recocat/notebooks/repos/recommenders/examples/02_model_collaborative_filtering/cornac_bpr_deep_dive.ipynb', ...}
@pytest.mark.smoke
@pytest.mark.gpu
def test_npa_smoke(notebooks):
notebook_path = notebooks["npa_quickstart"]
pm.execute_notebook(
notebook_path,
OUTPUT_NOTEBOOK,
kernel_name=KERNEL_NAME,
> parameters=dict(epochs=1, seed=42, MIND_type="demo"),
)
tests/smoke/test_notebooks_gpu.py:179:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
/anaconda/envs/reco_gpu/lib/python3.6/site-packages/papermill/execute.py:100: in execute_notebook
raise_for_execution_errors(nb, output_path)
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
nb = {'cells': [{'cell_type': 'code', 'metadata': {'inputHidden': True, 'hide_input': True}, 'execution_count': None, 'sour...nd_time': '2020-10-15T12:35:18.677146', 'duration': 13.536445, 'exception': True}}, 'nbformat': 4, 'nbformat_minor': 4}
output_path = 'output.ipynb'
def raise_for_execution_errors(nb, output_path):
"""Assigned parameters into the appropriate place in the input notebook
Parameters
----------
nb : NotebookNode
Executable notebook object
output_path : str
Path to write executed notebook
"""
error = None
for cell in nb.cells:
if cell.get("outputs") is None:
continue
for output in cell.outputs:
if output.output_type == "error":
error = PapermillExecutionError(
exec_count=cell.execution_count,
source=cell.source,
ename=output.ename,
evalue=output.evalue,
traceback=output.traceback,
)
break
if error:
# Write notebook back out with the Error Message at the top of the Notebook.
error_msg = ERROR_MESSAGE_TEMPLATE % str(error.exec_count)
error_msg_cell = nbformat.v4.new_code_cell(
source="%%html\n" + error_msg,
outputs=[
nbformat.v4.new_output(output_type="display_data", data={"text/html": error_msg})
],
metadata={"inputHidden": True, "hide_input": True},
)
nb.cells = [error_msg_cell] + nb.cells
write_ipynb(nb, output_path)
> raise error
E papermill.exceptions.PapermillExecutionError:
E ---------------------------------------------------------------------------
E Exception encountered at "In [8]":
E ---------------------------------------------------------------------------
E UnknownError Traceback (most recent call last)
E <ipython-input-8-a91b7bddd47e> in <module>
E ----> 1 print(model.run_eval(valid_news_file, valid_behaviors_file))
E
E ~/notebooks/repos/recommenders/reco_utils/recommender/newsrec/models/base_model.py in run_eval(self, news_filename, behaviors_file)
E 313 else:
E 314 _, group_labels, group_preds = self.run_slow_eval(
E --> 315 news_filename, behaviors_file
E 316 )
E 317 res = cal_metric(group_labels, group_preds, self.hparams.metrics)
E
E ~/notebooks/repos/recommenders/reco_utils/recommender/newsrec/models/base_model.py in run_slow_eval(self, news_filename, behaviors_file)
E 370 self.test_iterator.load_data_from_file(news_filename, behaviors_file)
E 371 ):
E --> 372 step_pred, step_labels, step_imp_index = self.eval(batch_data_input)
E 373 preds.extend(np.reshape(step_pred, -1))
E 374 labels.extend(np.reshape(step_labels, -1))
E
E ~/notebooks/repos/recommenders/reco_utils/recommender/newsrec/models/base_model.py in eval(self, eval_batch_data)
E 159 imp_index = eval_batch_data["impression_index_batch"]
E 160
E --> 161 pred_rslt = self.scorer.predict_on_batch(eval_input)
E 162
E 163 return pred_rslt, eval_label, imp_index
E
E /anaconda/envs/reco_gpu/lib/python3.6/site-packages/tensorflow_core/python/keras/engine/training.py in predict_on_batch(self, x)
E 1154
E 1155 self._make_predict_function()
E -> 1156 outputs = self.predict_function(inputs)
E 1157
E 1158 if len(outputs) == 1:
E
E /anaconda/envs/reco_gpu/lib/python3.6/site-packages/tensorflow_core/python/keras/backend.py in __call__(self, inputs)
E 3474
E 3475 fetched = self._callable_fn(*array_vals,
E -> 3476 run_metadata=self.run_metadata)
E 3477 self._call_fetch_callbacks(fetched[-len(self._fetches):])
E 3478 output_structure = nest.pack_sequence_as(
E
E /anaconda/envs/reco_gpu/lib/python3.6/site-packages/tensorflow_core/python/client/session.py in __call__(self, *args, **kwargs)
E 1470 ret = tf_session.TF_SessionRunCallable(self._session._session,
E 1471 self._handle, args,
E -> 1472 run_metadata_ptr)
E 1473 if run_metadata:
E 1474 proto_data = tf_session.TF_GetBuffer(run_metadata_ptr)
E
E UnknownError: 2 root error(s) found.
E (0) Unknown: Failed to get convolution algorithm. This is probably because cuDNN failed to initialize, so try looking to see if a warning log message was printed above.
E [[{{node news_encoder/conv1d/conv1d}}]]
E [[activation_3/Sigmoid/_75]]
E (1) Unknown: Failed to get convolution algorithm. This is probably because cuDNN failed to initialize, so try looking to see if a warning log message was printed above.
E [[{{node news_encoder/conv1d/conv1d}}]]
E 0 successful operations.
E 0 derived errors ignored.
/anaconda/envs/reco_gpu/lib/python3.6/site-packages/papermill/execute.py:248: PapermillExecutionError
------------------------------------------------------------ Captured stderr call ------------------------------------------------------------
46%|████▌ | 11/24 [00:08<00:11, 1.18it/s]2020-10-15 12:35:13.744292: I tensorflow/stream_executor/platform/default/dso_loader.cc:44] Successfully opened dynamic library libcuda.so.1
2020-10-15 12:35:13.760490: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1639] Found device 0 with properties:
name: Tesla V100-PCIE-16GB major: 7 minor: 0 memoryClockRate(GHz): 1.38
pciBusID: 0001:00:00.0
2020-10-15 12:35:13.760697: I tensorflow/stream_executor/platform/default/dso_loader.cc:44] Successfully opened dynamic library libcudart.so.10.0
2020-10-15 12:35:13.761749: I tensorflow/stream_executor/platform/default/dso_loader.cc:44] Successfully opened dynamic library libcublas.so.10.0
2020-10-15 12:35:13.762736: I tensorflow/stream_executor/platform/default/dso_loader.cc:44] Successfully opened dynamic library libcufft.so.10.0
2020-10-15 12:35:13.763030: I tensorflow/stream_executor/platform/default/dso_loader.cc:44] Successfully opened dynamic library libcurand.so.10.0
2020-10-15 12:35:13.764230: I tensorflow/stream_executor/platform/default/dso_loader.cc:44] Successfully opened dynamic library libcusolver.so.10.0
2020-10-15 12:35:13.765364: I tensorflow/stream_executor/platform/default/dso_loader.cc:44] Successfully opened dynamic library libcusparse.so.10.0
2020-10-15 12:35:13.768910: I tensorflow/stream_executor/platform/default/dso_loader.cc:44] Successfully opened dynamic library libcudnn.so.7
2020-10-15 12:35:13.770080: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1767] Adding visible gpu devices: 0
2020-10-15 12:35:13.770381: I tensorflow/core/platform/cpu_feature_guard.cc:142] Your CPU supports instructions that this TensorFlow binary was not compiled to use: AVX2 FMA
2020-10-15 12:35:13.776253: I tensorflow/core/platform/profile_utils/cpu_utils.cc:94] CPU Frequency: 2593990000 Hz
2020-10-15 12:35:13.777167: I tensorflow/compiler/xla/service/service.cc:168] XLA service 0x560f78c88a50 initialized for platform Host (this does not guarantee that XLA will be used). Devices:
2020-10-15 12:35:13.777191: I tensorflow/compiler/xla/service/service.cc:176] StreamExecutor device (0): Host, Default Version
2020-10-15 12:35:14.057428: I tensorflow/compiler/xla/service/service.cc:168] XLA service 0x560f78d41360 initialized for platform CUDA (this does not guarantee that XLA will be used). Devices:
2020-10-15 12:35:14.057474: I tensorflow/compiler/xla/service/service.cc:176] StreamExecutor device (0): Tesla V100-PCIE-16GB, Compute Capability 7.0
2020-10-15 12:35:14.058231: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1639] Found device 0 with properties:
name: Tesla V100-PCIE-16GB major: 7 minor: 0 memoryClockRate(GHz): 1.38
pciBusID: 0001:00:00.0
2020-10-15 12:35:14.058305: I tensorflow/stream_executor/platform/default/dso_loader.cc:44] Successfully opened dynamic library libcudart.so.10.0
2020-10-15 12:35:14.058335: I tensorflow/stream_executor/platform/default/dso_loader.cc:44] Successfully opened dynamic library libcublas.so.10.0
2020-10-15 12:35:14.058361: I tensorflow/stream_executor/platform/default/dso_loader.cc:44] Successfully opened dynamic library libcufft.so.10.0
2020-10-15 12:35:14.058386: I tensorflow/stream_executor/platform/default/dso_loader.cc:44] Successfully opened dynamic library libcurand.so.10.0
2020-10-15 12:35:14.058412: I tensorflow/stream_executor/platform/default/dso_loader.cc:44] Successfully opened dynamic library libcusolver.so.10.0
2020-10-15 12:35:14.058436: I tensorflow/stream_executor/platform/default/dso_loader.cc:44] Successfully opened dynamic library libcusparse.so.10.0
2020-10-15 12:35:14.058463: I tensorflow/stream_executor/platform/default/dso_loader.cc:44] Successfully opened dynamic library libcudnn.so.7
2020-10-15 12:35:14.059560: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1767] Adding visible gpu devices: 0
2020-10-15 12:35:14.059634: I tensorflow/stream_executor/platform/default/dso_loader.cc:44] Successfully opened dynamic library libcudart.so.10.0
2020-10-15 12:35:14.060990: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1180] Device interconnect StreamExecutor with strength 1 edge matrix:
2020-10-15 12:35:14.061011: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1186] 0
2020-10-15 12:35:14.061021: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1199] 0: N
2020-10-15 12:35:14.062212: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1325] Created TensorFlow device (/job:localhost/replica:0/task:0/device:GPU:0 with 6479 MB memory) -> physical GPU (device: 0, name: Tesla V100-PCIE-16GB, pci bus id: 0001:00:00.0, compute capability: 7.0)
58%|█████▊ | 14/24 [00:10<00:08, 1.22it/s]2020-10-15 12:35:16.687265: I tensorflow/stream_executor/platform/default/dso_loader.cc:44] Successfully opened dynamic library libcublas.so.10.0
2020-10-15 12:35:16.874472: I tensorflow/stream_executor/platform/default/dso_loader.cc:44] Successfully opened dynamic library libcudnn.so.7
2020-10-15 12:35:17.199956: E tensorflow/stream_executor/cuda/cuda_dnn.cc:329] Could not create cudnn handle: CUDNN_STATUS_INTERNAL_ERROR
2020-10-15 12:35:17.208697: E tensorflow/stream_executor/cuda/cuda_dnn.cc:329] Could not create cudnn handle: CUDNN_STATUS_INTERNAL_ERROR
2020-10-15 12:35:17.215533: E tensorflow/stream_executor/cuda/cuda_dnn.cc:329] Could not create cudnn handle: CUDNN_STATUS_INTERNAL_ERROR
2020-10-15 12:35:17.220079: E tensorflow/stream_executor/cuda/cuda_dnn.cc:329] Could not create cudnn handle: CUDNN_STATUS_INTERNAL_ERROR
62%|██████▎ | 15/24 [00:13<00:08, 1.11it/s]
______________________________________________________________ test_lstur_smoke ______________________________________________________________
notebooks = {'als_deep_dive': '/home/recocat/notebooks/repos/recommenders/examples/02_model_collaborative_filtering/als_deep_dive....'/home/recocat/notebooks/repos/recommenders/examples/02_model_collaborative_filtering/cornac_bpr_deep_dive.ipynb', ...}
@pytest.mark.smoke
@pytest.mark.gpu
def test_lstur_smoke(notebooks):
notebook_path = notebooks["lstur_quickstart"]
pm.execute_notebook(
notebook_path,
OUTPUT_NOTEBOOK,
kernel_name=KERNEL_NAME,
> parameters=dict(epochs=1, seed=40, MIND_type="demo"),
)
tests/smoke/test_notebooks_gpu.py:197:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
/anaconda/envs/reco_gpu/lib/python3.6/site-packages/papermill/execute.py:100: in execute_notebook
raise_for_execution_errors(nb, output_path)
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
nb = {'cells': [{'cell_type': 'code', 'metadata': {'inputHidden': True, 'hide_input': True}, 'execution_count': None, 'sour...end_time': '2020-10-15T12:35:32.165684', 'duration': 13.42153, 'exception': True}}, 'nbformat': 4, 'nbformat_minor': 4}
output_path = 'output.ipynb'
def raise_for_execution_errors(nb, output_path):
"""Assigned parameters into the appropriate place in the input notebook
Parameters
----------
nb : NotebookNode
Executable notebook object
output_path : str
Path to write executed notebook
"""
error = None
for cell in nb.cells:
if cell.get("outputs") is None:
continue
for output in cell.outputs:
if output.output_type == "error":
error = PapermillExecutionError(
exec_count=cell.execution_count,
source=cell.source,
ename=output.ename,
evalue=output.evalue,
traceback=output.traceback,
)
break
if error:
# Write notebook back out with the Error Message at the top of the Notebook.
error_msg = ERROR_MESSAGE_TEMPLATE % str(error.exec_count)
error_msg_cell = nbformat.v4.new_code_cell(
source="%%html\n" + error_msg,
outputs=[
nbformat.v4.new_output(output_type="display_data", data={"text/html": error_msg})
],
metadata={"inputHidden": True, "hide_input": True},
)
nb.cells = [error_msg_cell] + nb.cells
write_ipynb(nb, output_path)
> raise error
E papermill.exceptions.PapermillExecutionError:
E ---------------------------------------------------------------------------
E Exception encountered at "In [8]":
E ---------------------------------------------------------------------------
E UnknownError Traceback (most recent call last)
E <ipython-input-8-a91b7bddd47e> in <module>
E ----> 1 print(model.run_eval(valid_news_file, valid_behaviors_file))
E
E ~/notebooks/repos/recommenders/reco_utils/recommender/newsrec/models/base_model.py in run_eval(self, news_filename, behaviors_file)
E 309 if self.support_quick_scoring:
E 310 _, group_labels, group_preds = self.run_fast_eval(
E --> 311 news_filename, behaviors_file
E 312 )
E 313 else:
E
E ~/notebooks/repos/recommenders/reco_utils/recommender/newsrec/models/base_model.py in run_fast_eval(self, news_filename, behaviors_file)
E 381
E 382 def run_fast_eval(self, news_filename, behaviors_file):
E --> 383 news_vecs = self.run_news(news_filename)
E 384 user_vecs = self.run_user(news_filename, behaviors_file)
E 385
E
E ~/notebooks/repos/recommenders/reco_utils/recommender/newsrec/models/base_model.py in run_news(self, news_filename)
E 356 self.test_iterator.load_news_from_file(news_filename)
E 357 ):
E --> 358 news_index, news_vec = self.news(batch_data_input)
E 359 news_indexes.extend(np.reshape(news_index, -1))
E 360 news_vecs.extend(news_vec)
E
E ~/notebooks/repos/recommenders/reco_utils/recommender/newsrec/models/base_model.py in news(self, batch_news_input)
E 327 def news(self, batch_news_input):
E 328 news_input = self._get_news_feature_from_iter(batch_news_input)
E --> 329 news_vec = self.newsencoder.predict_on_batch(news_input)
E 330 news_index = batch_news_input["news_index_batch"]
E 331
E
E /anaconda/envs/reco_gpu/lib/python3.6/site-packages/tensorflow_core/python/keras/engine/training.py in predict_on_batch(self, x)
E 1154
E 1155 self._make_predict_function()
E -> 1156 outputs = self.predict_function(inputs)
E 1157
E 1158 if len(outputs) == 1:
E
E /anaconda/envs/reco_gpu/lib/python3.6/site-packages/tensorflow_core/python/keras/backend.py in __call__(self, inputs)
E 3474
E 3475 fetched = self._callable_fn(*array_vals,
E -> 3476 run_metadata=self.run_metadata)
E 3477 self._call_fetch_callbacks(fetched[-len(self._fetches):])
E 3478 output_structure = nest.pack_sequence_as(
E
E /anaconda/envs/reco_gpu/lib/python3.6/site-packages/tensorflow_core/python/client/session.py in __call__(self, *args, **kwargs)
E 1470 ret = tf_session.TF_SessionRunCallable(self._session._session,
E 1471 self._handle, args,
E -> 1472 run_metadata_ptr)
E 1473 if run_metadata:
E 1474 proto_data = tf_session.TF_GetBuffer(run_metadata_ptr)
E
E UnknownError: 2 root error(s) found.
E (0) Unknown: Failed to get convolution algorithm. This is probably because cuDNN failed to initialize, so try looking to see if a warning log message was printed above.
E [[{{node conv1d/conv1d}}]]
E [[att_layer2/Sum_1/_33]]
E (1) Unknown: Failed to get convolution algorithm. This is probably because cuDNN failed to initialize, so try looking to see if a warning log message was printed above.
E [[{{node conv1d/conv1d}}]]
E 0 successful operations.
E 0 derived errors ignored.
/anaconda/envs/reco_gpu/lib/python3.6/site-packages/papermill/execute.py:248: PapermillExecutionError
------------------------------------------------------------ Captured stderr call ------------------------------------------------------------
46%|████▌ | 11/24 [00:08<00:10, 1.21it/s]2020-10-15 12:35:27.139587: I tensorflow/stream_executor/platform/default/dso_loader.cc:44] Successfully opened dynamic library libcuda.so.1
2020-10-15 12:35:27.156090: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1639] Found device 0 with properties:
name: Tesla V100-PCIE-16GB major: 7 minor: 0 memoryClockRate(GHz): 1.38
pciBusID: 0001:00:00.0
2020-10-15 12:35:27.156303: I tensorflow/stream_executor/platform/default/dso_loader.cc:44] Successfully opened dynamic library libcudart.so.10.0
2020-10-15 12:35:27.157354: I tensorflow/stream_executor/platform/default/dso_loader.cc:44] Successfully opened dynamic library libcublas.so.10.0
2020-10-15 12:35:27.158361: I tensorflow/stream_executor/platform/default/dso_loader.cc:44] Successfully opened dynamic library libcufft.so.10.0
2020-10-15 12:35:27.158653: I tensorflow/stream_executor/platform/default/dso_loader.cc:44] Successfully opened dynamic library libcurand.so.10.0
2020-10-15 12:35:27.159866: I tensorflow/stream_executor/platform/default/dso_loader.cc:44] Successfully opened dynamic library libcusolver.so.10.0
2020-10-15 12:35:27.160881: I tensorflow/stream_executor/platform/default/dso_loader.cc:44] Successfully opened dynamic library libcusparse.so.10.0
2020-10-15 12:35:27.164493: I tensorflow/stream_executor/platform/default/dso_loader.cc:44] Successfully opened dynamic library libcudnn.so.7
2020-10-15 12:35:27.165670: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1767] Adding visible gpu devices: 0
2020-10-15 12:35:27.165951: I tensorflow/core/platform/cpu_feature_guard.cc:142] Your CPU supports instructions that this TensorFlow binary was not compiled to use: AVX2 FMA
2020-10-15 12:35:27.172189: I tensorflow/core/platform/profile_utils/cpu_utils.cc:94] CPU Frequency: 2593990000 Hz
2020-10-15 12:35:27.173083: I tensorflow/compiler/xla/service/service.cc:168] XLA service 0x560d3394e560 initialized for platform Host (this does not guarantee that XLA will be used). Devices:
2020-10-15 12:35:27.173107: I tensorflow/compiler/xla/service/service.cc:176] StreamExecutor device (0): Host, Default Version
2020-10-15 12:35:27.453397: I tensorflow/compiler/xla/service/service.cc:168] XLA service 0x560d339105d0 initialized for platform CUDA (this does not guarantee that XLA will be used). Devices:
2020-10-15 12:35:27.453442: I tensorflow/compiler/xla/service/service.cc:176] StreamExecutor device (0): Tesla V100-PCIE-16GB, Compute Capability 7.0
2020-10-15 12:35:27.454183: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1639] Found device 0 with properties:
name: Tesla V100-PCIE-16GB major: 7 minor: 0 memoryClockRate(GHz): 1.38
pciBusID: 0001:00:00.0
2020-10-15 12:35:27.454259: I tensorflow/stream_executor/platform/default/dso_loader.cc:44] Successfully opened dynamic library libcudart.so.10.0
2020-10-15 12:35:27.454289: I tensorflow/stream_executor/platform/default/dso_loader.cc:44] Successfully opened dynamic library libcublas.so.10.0
2020-10-15 12:35:27.454314: I tensorflow/stream_executor/platform/default/dso_loader.cc:44] Successfully opened dynamic library libcufft.so.10.0
2020-10-15 12:35:27.454340: I tensorflow/stream_executor/platform/default/dso_loader.cc:44] Successfully opened dynamic library libcurand.so.10.0
2020-10-15 12:35:27.454365: I tensorflow/stream_executor/platform/default/dso_loader.cc:44] Successfully opened dynamic library libcusolver.so.10.0
2020-10-15 12:35:27.454390: I tensorflow/stream_executor/platform/default/dso_loader.cc:44] Successfully opened dynamic library libcusparse.so.10.0
2020-10-15 12:35:27.454416: I tensorflow/stream_executor/platform/default/dso_loader.cc:44] Successfully opened dynamic library libcudnn.so.7
2020-10-15 12:35:27.455506: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1767] Adding visible gpu devices: 0
2020-10-15 12:35:27.455575: I tensorflow/stream_executor/platform/default/dso_loader.cc:44] Successfully opened dynamic library libcudart.so.10.0
2020-10-15 12:35:27.456947: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1180] Device interconnect StreamExecutor with strength 1 edge matrix:
2020-10-15 12:35:27.456969: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1186] 0
2020-10-15 12:35:27.456978: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1199] 0: N
2020-10-15 12:35:27.458177: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1325] Created TensorFlow device (/job:localhost/replica:0/task:0/device:GPU:0 with 6479 MB memory) -> physical GPU (device: 0, name: Tesla V100-PCIE-16GB, pci bus id: 0001:00:00.0, compute capability: 7.0)
58%|█████▊ | 14/24 [00:10<00:08, 1.23it/s]2020-10-15 12:35:30.157878: I tensorflow/stream_executor/platform/default/dso_loader.cc:44] Successfully opened dynamic library libcublas.so.10.0
2020-10-15 12:35:30.348381: I tensorflow/stream_executor/platform/default/dso_loader.cc:44] Successfully opened dynamic library libcudnn.so.7
2020-10-15 12:35:30.700098: E tensorflow/stream_executor/cuda/cuda_dnn.cc:329] Could not create cudnn handle: CUDNN_STATUS_INTERNAL_ERROR
2020-10-15 12:35:30.702039: E tensorflow/stream_executor/cuda/cuda_dnn.cc:329] Could not create cudnn handle: CUDNN_STATUS_INTERNAL_ERROR
62%|██████▎ | 15/24 [00:13<00:08, 1.12it/s]
============================================================= slowest durations ==============================================================
217.13s call tests/smoke/test_newsrec_model.py::test_model_naml
141.18s call tests/smoke/test_newsrec_model.py::test_model_lstur
140.12s call tests/smoke/test_newsrec_model.py::test_model_npa
111.17s call tests/smoke/test_notebooks_gpu.py::test_nrms_smoke
102.53s call tests/smoke/test_deeprec_model.py::test_model_slirec
100.84s call tests/smoke/test_notebooks_gpu.py::test_ncf_deep_dive_smoke
97.35s call tests/smoke/test_notebooks_gpu.py::test_wide_deep_smoke
85.72s call tests/smoke/test_newsrec_model.py::test_model_nrms
64.48s call tests/smoke/test_notebooks_gpu.py::test_xdeepfm_smoke
36.15s call tests/smoke/test_notebooks_gpu.py::test_ncf_smoke
32.13s call tests/smoke/test_notebooks_gpu.py::test_fastai_smoke
19.41s call tests/smoke/test_deeprec_model.py::test_model_dkn
15.40s call tests/smoke/test_notebooks_gpu.py::test_naml_smoke
13.59s call tests/smoke/test_notebooks_gpu.py::test_npa_smoke
13.48s call tests/smoke/test_notebooks_gpu.py::test_lstur_smoke
8.18s call tests/smoke/test_deeprec_model.py::test_model_lightgcn
3.89s call tests/smoke/test_deeprec_model.py::test_model_xdeepfm
0.03s teardown tests/smoke/test_newsrec_model.py::test_model_nrms
0.03s teardown tests/smoke/test_newsrec_model.py::test_model_lstur
0.03s teardown tests/smoke/test_newsrec_model.py::test_model_naml
0.03s teardown tests/smoke/test_newsrec_model.py::test_model_npa
0.02s teardown tests/smoke/test_notebooks_gpu.py::test_wide_deep_smoke
(32 durations < 0.005s hidden. Use -vv to show these durations.)
========================================================== short test summary info ===========================================================
FAILED tests/smoke/test_notebooks_gpu.py::test_naml_smoke - papermill.exceptions.PapermillExecutionError:
FAILED tests/smoke/test_notebooks_gpu.py::test_npa_smoke - papermill.exceptions.PapermillExecutionError:
FAILED tests/smoke/test_notebooks_gpu.py::test_lstur_smoke - papermill.exceptions.PapermillExecutionError:
================================== 3 failed, 15 passed, 18 deselected, 1732 warnings in 1206.14s (0:20:06) ===================================
fixed with https://github.com/microsoft/recommenders/pull/1226
conda activate reco_pyspark
pytest tests/smoke -m "smoke and spark and not gpu" --durations 0 --disable-warnings
============================= test session starts ==============================
platform linux -- Python 3.6.11, pytest-6.1.1, py-1.9.0, pluggy-0.13.1
rootdir: /home/recocat/notebooks/repos/recommenders
collected 36 items / 32 deselected / 4 selected
tests/smoke/test_criteo.py . [ 25%]
tests/smoke/test_movielens.py . [ 50%]
tests/smoke/test_notebooks_pyspark.py .F [100%]
=================================== FAILURES ===================================
_____________________ test_mmlspark_lightgbm_criteo_smoke ______________________
notebooks = {'als_deep_dive': '/home/recocat/notebooks/repos/recommenders/examples/02_model_collaborative_filtering/als_deep_dive....'/home/recocat/notebooks/repos/recommenders/examples/02_model_collaborative_filtering/cornac_bpr_deep_dive.ipynb', ...}
@pytest.mark.smoke
@pytest.mark.spark
@pytest.mark.skipif(sys.platform == "win32", reason="Not implemented on Windows")
def test_mmlspark_lightgbm_criteo_smoke(notebooks):
notebook_path = notebooks["mmlspark_lightgbm_criteo"]
pm.execute_notebook(
notebook_path,
OUTPUT_NOTEBOOK,
kernel_name=KERNEL_NAME,
> parameters=dict(DATA_SIZE="sample", NUM_ITERATIONS=50, EARLY_STOPPING_ROUND=10),
)
tests/smoke/test_notebooks_pyspark.py:46:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
/anaconda/envs/reco_pyspark/lib/python3.6/site-packages/papermill/execute.py:100: in execute_notebook
raise_for_execution_errors(nb, output_path)
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
nb = {'cells': [{'cell_type': 'code', 'metadata': {'inputHidden': True, 'hide_input': True}, 'execution_count': None, 'sour...nd_time': '2020-10-16T14:20:38.712858', 'duration': 30.996296, 'exception': True}}, 'nbformat': 4, 'nbformat_minor': 2}
output_path = 'output.ipynb'
def raise_for_execution_errors(nb, output_path):
"""Assigned parameters into the appropriate place in the input notebook
Parameters
----------
nb : NotebookNode
Executable notebook object
output_path : str
Path to write executed notebook
"""
error = None
for cell in nb.cells:
if cell.get("outputs") is None:
continue
for output in cell.outputs:
if output.output_type == "error":
error = PapermillExecutionError(
exec_count=cell.execution_count,
source=cell.source,
ename=output.ename,
evalue=output.evalue,
traceback=output.traceback,
)
break
if error:
# Write notebook back out with the Error Message at the top of the Notebook.
error_msg = ERROR_MESSAGE_TEMPLATE % str(error.exec_count)
error_msg_cell = nbformat.v4.new_code_cell(
source="%%html\n" + error_msg,
outputs=[
nbformat.v4.new_output(output_type="display_data", data={"text/html": error_msg})
],
metadata={"inputHidden": True, "hide_input": True},
)
nb.cells = [error_msg_cell] + nb.cells
write_ipynb(nb, output_path)
> raise error
E papermill.exceptions.PapermillExecutionError:
E ---------------------------------------------------------------------------
E Exception encountered at "In [9]":
E ---------------------------------------------------------------------------
E Py4JJavaError Traceback (most recent call last)
E <ipython-input-9-2c2d97ba8c1c> in <module>
E ----> 1 model = lgbm.fit(train)
E
E /anaconda/envs/reco_pyspark/lib/python3.6/site-packages/pyspark/ml/base.py in fit(self, dataset, params)
E 130 return self.copy(params)._fit(dataset)
E 131 else:
E --> 132 return self._fit(dataset)
E 133 else:
E 134 raise ValueError("Params must be either a param map or a list/tuple of param maps, "
E
E /anaconda/envs/reco_pyspark/lib/python3.6/site-packages/pyspark/ml/wrapper.py in _fit(self, dataset)
E 293
E 294 def _fit(self, dataset):
E --> 295 java_model = self._fit_java(dataset)
E 296 model = self._create_model(java_model)
E 297 return self._copyValues(model)
E
E /anaconda/envs/reco_pyspark/lib/python3.6/site-packages/pyspark/ml/wrapper.py in _fit_java(self, dataset)
E 290 """
E 291 self._transfer_params_to_java()
E --> 292 return self._java_obj.fit(dataset._jdf)
E 293
E 294 def _fit(self, dataset):
E
E /anaconda/envs/reco_pyspark/lib/python3.6/site-packages/py4j/java_gateway.py in __call__(self, *args)
E 1255 answer = self.gateway_client.send_command(command)
E 1256 return_value = get_return_value(
E -> 1257 answer, self.gateway_client, self.target_id, self.name)
E 1258
E 1259 for temp_arg in temp_args:
E
E /anaconda/envs/reco_pyspark/lib/python3.6/site-packages/pyspark/sql/utils.py in deco(*a, **kw)
E 61 def deco(*a, **kw):
E 62 try:
E ---> 63 return f(*a, **kw)
E 64 except py4j.protocol.Py4JJavaError as e:
E 65 s = e.java_exception.toString()
E
E /anaconda/envs/reco_pyspark/lib/python3.6/site-packages/py4j/protocol.py in get_return_value(answer, gateway_client, target_id, name)
E 326 raise Py4JJavaError(
E 327 "An error occurred while calling {0}{1}{2}.\n".
E --> 328 format(target_id, ".", name), value)
E 329 else:
E 330 raise Py4JError(
E
E Py4JJavaError: An error occurred while calling o106.fit.
E : org.apache.spark.SparkException: Job aborted due to stage failure: Task 5 in stage 6.0 failed 1 times, most recent failure: Lost task 5.0 in stage 6.0 (TID 26, localhost, executor driver): java.lang.UnsatisfiedLinkError: com.microsoft.ml.lightgbm.lightgbmlibJNI.LGBM_NetworkFree()I
E at com.microsoft.ml.lightgbm.lightgbmlibJNI.LGBM_NetworkFree(Native Method)
E at com.microsoft.ml.lightgbm.lightgbmlib.LGBM_NetworkFree(lightgbmlib.java:209)
E at com.microsoft.ml.spark.lightgbm.TrainUtils$.trainLightGBM(TrainUtils.scala:415)
E at com.microsoft.ml.spark.lightgbm.LightGBMBase$$anonfun$6.apply(LightGBMBase.scala:85)
E at com.microsoft.ml.spark.lightgbm.LightGBMBase$$anonfun$6.apply(LightGBMBase.scala:85)
E at org.apache.spark.sql.execution.MapPartitionsExec$$anonfun$5.apply(objects.scala:188)
E at org.apache.spark.sql.execution.MapPartitionsExec$$anonfun$5.apply(objects.scala:185)
E at org.apache.spark.rdd.RDD$$anonfun$mapPartitionsInternal$1$$anonfun$apply$24.apply(RDD.scala:858)
E at org.apache.spark.rdd.RDD$$anonfun$mapPartitionsInternal$1$$anonfun$apply$24.apply(RDD.scala:858)
E at org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:52)
E at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:346)
E at org.apache.spark.rdd.RDD.iterator(RDD.scala:310)
E at org.apache.spark.sql.execution.SQLExecutionRDD.compute(SQLExecutionRDD.scala:55)
E at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:346)
E at org.apache.spark.rdd.RDD.iterator(RDD.scala:310)
E at org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:52)
E at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:346)
E at org.apache.spark.rdd.RDD.iterator(RDD.scala:310)
E at org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:90)
E at org.apache.spark.scheduler.Task.run(Task.scala:123)
E at org.apache.spark.executor.Executor$TaskRunner$$anonfun$10.apply(Executor.scala:408)
E at org.apache.spark.util.Utils$.tryWithSafeFinally(Utils.scala:1360)
E at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:414)
E at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149)
E at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624)
E at java.lang.Thread.run(Thread.java:748)
E
E Driver stacktrace:
E at org.apache.spark.scheduler.DAGScheduler.org$apache$spark$scheduler$DAGScheduler$$failJobAndIndependentStages(DAGScheduler.scala:1891)
E at org.apache.spark.scheduler.DAGScheduler$$anonfun$abortStage$1.apply(DAGScheduler.scala:1879)
E at org.apache.spark.scheduler.DAGScheduler$$anonfun$abortStage$1.apply(DAGScheduler.scala:1878)
E at scala.collection.mutable.ResizableArray$class.foreach(ResizableArray.scala:59)
E at scala.collection.mutable.ArrayBuffer.foreach(ArrayBuffer.scala:48)
E at org.apache.spark.scheduler.DAGScheduler.abortStage(DAGScheduler.scala:1878)
E at org.apache.spark.scheduler.DAGScheduler$$anonfun$handleTaskSetFailed$1.apply(DAGScheduler.scala:927)
E at org.apache.spark.scheduler.DAGScheduler$$anonfun$handleTaskSetFailed$1.apply(DAGScheduler.scala:927)
E at scala.Option.foreach(Option.scala:257)
E at org.apache.spark.scheduler.DAGScheduler.handleTaskSetFailed(DAGScheduler.scala:927)
E at org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.doOnReceive(DAGScheduler.scala:2112)
E at org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.onReceive(DAGScheduler.scala:2061)
E at org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.onReceive(DAGScheduler.scala:2050)
E at org.apache.spark.util.EventLoop$$anon$1.run(EventLoop.scala:49)
E at org.apache.spark.scheduler.DAGScheduler.runJob(DAGScheduler.scala:738)
E at org.apache.spark.SparkContext.runJob(SparkContext.scala:2061)
E at org.apache.spark.SparkContext.runJob(SparkContext.scala:2158)
E at org.apache.spark.rdd.RDD$$anonfun$reduce$1.apply(RDD.scala:1080)
E at org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:151)
E at org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:112)
E at org.apache.spark.rdd.RDD.withScope(RDD.scala:385)
E at org.apache.spark.rdd.RDD.reduce(RDD.scala:1062)
E at org.apache.spark.sql.Dataset$$anonfun$reduce$1.apply(Dataset.scala:1643)
E at org.apache.spark.sql.Dataset$$anonfun$withNewRDDExecutionId$1.apply(Dataset.scala:3355)
E at org.apache.spark.sql.execution.SQLExecution$$anonfun$withNewExecutionId$1.apply(SQLExecution.scala:80)
E at org.apache.spark.sql.execution.SQLExecution$.withSQLConfPropagated(SQLExecution.scala:127)
E at org.apache.spark.sql.execution.SQLExecution$.withNewExecutionId(SQLExecution.scala:75)
E at org.apache.spark.sql.Dataset.withNewRDDExecutionId(Dataset.scala:3351)
E at org.apache.spark.sql.Dataset.reduce(Dataset.scala:1642)
E at com.microsoft.ml.spark.lightgbm.LightGBMBase$class.innerTrain(LightGBMBase.scala:90)
E at com.microsoft.ml.spark.lightgbm.LightGBMClassifier.innerTrain(LightGBMClassifier.scala:26)
E at com.microsoft.ml.spark.lightgbm.LightGBMBase$class.train(LightGBMBase.scala:38)
E at com.microsoft.ml.spark.lightgbm.LightGBMClassifier.train(LightGBMClassifier.scala:26)
E at com.microsoft.ml.spark.lightgbm.LightGBMClassifier.train(LightGBMClassifier.scala:26)
E at org.apache.spark.ml.Predictor.fit(Predictor.scala:118)
E at org.apache.spark.ml.Predictor.fit(Predictor.scala:82)
E at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
E at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)
E at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
E at java.lang.reflect.Method.invoke(Method.java:498)
E at py4j.reflection.MethodInvoker.invoke(MethodInvoker.java:244)
E at py4j.reflection.ReflectionEngine.invoke(ReflectionEngine.java:357)
E at py4j.Gateway.invoke(Gateway.java:282)
E at py4j.commands.AbstractCommand.invokeMethod(AbstractCommand.java:132)
E at py4j.commands.CallCommand.execute(CallCommand.java:79)
E at py4j.GatewayConnection.run(GatewayConnection.java:238)
E at java.lang.Thread.run(Thread.java:748)
E Caused by: java.lang.UnsatisfiedLinkError: com.microsoft.ml.lightgbm.lightgbmlibJNI.LGBM_NetworkFree()I
E at com.microsoft.ml.lightgbm.lightgbmlibJNI.LGBM_NetworkFree(Native Method)
E at com.microsoft.ml.lightgbm.lightgbmlib.LGBM_NetworkFree(lightgbmlib.java:209)
E at com.microsoft.ml.spark.lightgbm.TrainUtils$.trainLightGBM(TrainUtils.scala:415)
E at com.microsoft.ml.spark.lightgbm.LightGBMBase$$anonfun$6.apply(LightGBMBase.scala:85)
E at com.microsoft.ml.spark.lightgbm.LightGBMBase$$anonfun$6.apply(LightGBMBase.scala:85)
E at org.apache.spark.sql.execution.MapPartitionsExec$$anonfun$5.apply(objects.scala:188)
E at org.apache.spark.sql.execution.MapPartitionsExec$$anonfun$5.apply(objects.scala:185)
E at org.apache.spark.rdd.RDD$$anonfun$mapPartitionsInternal$1$$anonfun$apply$24.apply(RDD.scala:858)
E at org.apache.spark.rdd.RDD$$anonfun$mapPartitionsInternal$1$$anonfun$apply$24.apply(RDD.scala:858)
E at org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:52)
E at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:346)
E at org.apache.spark.rdd.RDD.iterator(RDD.scala:310)
E at org.apache.spark.sql.execution.SQLExecutionRDD.compute(SQLExecutionRDD.scala:55)
E at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:346)
E at org.apache.spark.rdd.RDD.iterator(RDD.scala:310)
E at org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:52)
E at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:346)
E at org.apache.spark.rdd.RDD.iterator(RDD.scala:310)
E at org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:90)
E at org.apache.spark.scheduler.Task.run(Task.scala:123)
E at org.apache.spark.executor.Executor$TaskRunner$$anonfun$10.apply(Executor.scala:408)
E at org.apache.spark.util.Utils$.tryWithSafeFinally(Utils.scala:1360)
E at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:414)
E at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149)
E at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624)
E ... 1 more
/anaconda/envs/reco_pyspark/lib/python3.6/site-packages/papermill/execute.py:248: PapermillExecutionError
----------------------------- Captured stderr call -----------------------------
4%|▍ | 1/24 [00:00<00:16, 1.40it/s]https://mvnrepository.com/artifact added as a remote repository with the name: repo-1
Ivy Default Cache set to: /home/recocat/.ivy2/cache
The jars for the packages stored in: /home/recocat/.ivy2/jars
:: loading settings :: url = jar:file:/dsvm/tools/spark/spark-2.4.5/jars/ivy-2.4.0.jar!/org/apache/ivy/core/settings/ivysettings.xml
com.microsoft.ml.spark#mmlspark_2.11 added as a dependency
:: resolving dependencies :: org.apache.spark#spark-submit-parent-5ed69986-9758-4217-a529-78fb10828234;1.0
confs: [default]
found com.microsoft.ml.spark#mmlspark_2.11;0.18.1 in central
found org.scalactic#scalactic_2.11;3.0.5 in central
found org.scala-lang#scala-reflect;2.11.12 in central
found org.scalatest#scalatest_2.11;3.0.5 in central
found org.scala-lang.modules#scala-xml_2.11;1.0.6 in central
found io.spray#spray-json_2.11;1.3.2 in central
found com.microsoft.cntk#cntk;2.4 in central
found org.openpnp#opencv;3.2.0-1 in central
found com.jcraft#jsch;0.1.54 in central
found org.apache.httpcomponents#httpclient;4.5.6 in central
found org.apache.httpcomponents#httpcore;4.4.10 in central
found commons-logging#commons-logging;1.2 in central
found commons-codec#commons-codec;1.10 in central
found com.microsoft.ml.lightgbm#lightgbmlib;2.2.350 in central
found com.github.vowpalwabbit#vw-jni;8.7.0.2 in central
:: resolution report :: resolve 365ms :: artifacts dl 9ms
:: modules in use:
com.github.vowpalwabbit#vw-jni;8.7.0.2 from central in [default]
com.jcraft#jsch;0.1.54 from central in [default]
com.microsoft.cntk#cntk;2.4 from central in [default]
com.microsoft.ml.lightgbm#lightgbmlib;2.2.350 from central in [default]
com.microsoft.ml.spark#mmlspark_2.11;0.18.1 from central in [default]
commons-codec#commons-codec;1.10 from central in [default]
commons-logging#commons-logging;1.2 from central in [default]
io.spray#spray-json_2.11;1.3.2 from central in [default]
org.apache.httpcomponents#httpclient;4.5.6 from central in [default]
org.apache.httpcomponents#httpcore;4.4.10 from central in [default]
org.openpnp#opencv;3.2.0-1 from central in [default]
org.scala-lang#scala-reflect;2.11.12 from central in [default]
org.scala-lang.modules#scala-xml_2.11;1.0.6 from central in [default]
org.scalactic#scalactic_2.11;3.0.5 from central in [default]
org.scalatest#scalatest_2.11;3.0.5 from central in [default]
---------------------------------------------------------------------
| | modules || artifacts |
| conf | number| search|dwnlded|evicted|| number|dwnlded|
---------------------------------------------------------------------
| default | 15 | 0 | 0 | 0 || 15 | 0 |
---------------------------------------------------------------------
:: retrieving :: org.apache.spark#spark-submit-parent-5ed69986-9758-4217-a529-78fb10828234
confs: [default]
0 artifacts copied, 15 already retrieved (0kB/9ms)
20/10/16 14:20:11 WARN NativeCodeLoader: Unable to load native-hadoop library for your platform... using builtin-java classes where applicable
Using Spark's default log4j profile: org/apache/spark/log4j-defaults.properties
Setting default log level to "WARN".
To adjust logging level use sc.setLogLevel(newLevel). For SparkR, use setLogLevel(newLevel).
20/10/16 14:20:12 WARN Utils: Service 'SparkUI' could not bind on port 4040. Attempting port 4041.
21%|██ | 5/24 [00:05<00:16, 1.15it/s]20/10/16 14:20:16 WARN Utils: Truncated the string representation of a plan since it was too large. This behavior can be adjusted by setting 'spark.debug.maxToStringFields' in SparkEnv.conf.
[Stage 6:> (0 + 6) / 6]20/10/16 14:20:38 ERROR Executor: Exception in task 5.0 in stage 6.0 (TID 26)
java.lang.UnsatisfiedLinkError: com.microsoft.ml.lightgbm.lightgbmlibJNI.LGBM_NetworkFree()I
at com.microsoft.ml.lightgbm.lightgbmlibJNI.LGBM_NetworkFree(Native Method)
at com.microsoft.ml.lightgbm.lightgbmlib.LGBM_NetworkFree(lightgbmlib.java:209)
at com.microsoft.ml.spark.lightgbm.TrainUtils$.trainLightGBM(TrainUtils.scala:415)
at com.microsoft.ml.spark.lightgbm.LightGBMBase$$anonfun$6.apply(LightGBMBase.scala:85)
at com.microsoft.ml.spark.lightgbm.LightGBMBase$$anonfun$6.apply(LightGBMBase.scala:85)
at org.apache.spark.sql.execution.MapPartitionsExec$$anonfun$5.apply(objects.scala:188)
at org.apache.spark.sql.execution.MapPartitionsExec$$anonfun$5.apply(objects.scala:185)
at org.apache.spark.rdd.RDD$$anonfun$mapPartitionsInternal$1$$anonfun$apply$24.apply(RDD.scala:858)
at org.apache.spark.rdd.RDD$$anonfun$mapPartitionsInternal$1$$anonfun$apply$24.apply(RDD.scala:858)
at org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:52)
at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:346)
at org.apache.spark.rdd.RDD.iterator(RDD.scala:310)
at org.apache.spark.sql.execution.SQLExecutionRDD.compute(SQLExecutionRDD.scala:55)
at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:346)
at org.apache.spark.rdd.RDD.iterator(RDD.scala:310)
at org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:52)
at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:346)
at org.apache.spark.rdd.RDD.iterator(RDD.scala:310)
at org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:90)
at org.apache.spark.scheduler.Task.run(Task.scala:123)
at org.apache.spark.executor.Executor$TaskRunner$$anonfun$10.apply(Executor.scala:408)
at org.apache.spark.util.Utils$.tryWithSafeFinally(Utils.scala:1360)
at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:414)
at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149)
at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624)
at java.lang.Thread.run(Thread.java:748)
20/10/16 14:20:38 ERROR Executor: Exception in task 0.0 in stage 6.0 (TID 21)
java.lang.UnsatisfiedLinkError: com.microsoft.ml.lightgbm.lightgbmlibJNI.LGBM_NetworkFree()I
at com.microsoft.ml.lightgbm.lightgbmlibJNI.LGBM_NetworkFree(Native Method)
at com.microsoft.ml.lightgbm.lightgbmlib.LGBM_NetworkFree(lightgbmlib.java:209)
at com.microsoft.ml.spark.lightgbm.TrainUtils$.trainLightGBM(TrainUtils.scala:415)
at com.microsoft.ml.spark.lightgbm.LightGBMBase$$anonfun$6.apply(LightGBMBase.scala:85)
at com.microsoft.ml.spark.lightgbm.LightGBMBase$$anonfun$6.apply(LightGBMBase.scala:85)
at org.apache.spark.sql.execution.MapPartitionsExec$$anonfun$5.apply(objects.scala:188)
at org.apache.spark.sql.execution.MapPartitionsExec$$anonfun$5.apply(objects.scala:185)
at org.apache.spark.rdd.RDD$$anonfun$mapPartitionsInternal$1$$anonfun$apply$24.apply(RDD.scala:858)
at org.apache.spark.rdd.RDD$$anonfun$mapPartitionsInternal$1$$anonfun$apply$24.apply(RDD.scala:858)
at org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:52)
at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:346)
at org.apache.spark.rdd.RDD.iterator(RDD.scala:310)
at org.apache.spark.sql.execution.SQLExecutionRDD.compute(SQLExecutionRDD.scala:55)
at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:346)
at org.apache.spark.rdd.RDD.iterator(RDD.scala:310)
at org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:52)
at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:346)
at org.apache.spark.rdd.RDD.iterator(RDD.scala:310)
at org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:90)
at org.apache.spark.scheduler.Task.run(Task.scala:123)
at org.apache.spark.executor.Executor$TaskRunner$$anonfun$10.apply(Executor.scala:408)
at org.apache.spark.util.Utils$.tryWithSafeFinally(Utils.scala:1360)
at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:414)
at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149)
at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624)
at java.lang.Thread.run(Thread.java:748)
20/10/16 14:20:38 ERROR Executor: Exception in task 4.0 in stage 6.0 (TID 25)
java.lang.UnsatisfiedLinkError: com.microsoft.ml.lightgbm.lightgbmlibJNI.LGBM_NetworkFree()I
at com.microsoft.ml.lightgbm.lightgbmlibJNI.LGBM_NetworkFree(Native Method)
at com.microsoft.ml.lightgbm.lightgbmlib.LGBM_NetworkFree(lightgbmlib.java:209)
at com.microsoft.ml.spark.lightgbm.TrainUtils$.trainLightGBM(TrainUtils.scala:415)
at com.microsoft.ml.spark.lightgbm.LightGBMBase$$anonfun$6.apply(LightGBMBase.scala:85)
at com.microsoft.ml.spark.lightgbm.LightGBMBase$$anonfun$6.apply(LightGBMBase.scala:85)
at org.apache.spark.sql.execution.MapPartitionsExec$$anonfun$5.apply(objects.scala:188)
at org.apache.spark.sql.execution.MapPartitionsExec$$anonfun$5.apply(objects.scala:185)
at org.apache.spark.rdd.RDD$$anonfun$mapPartitionsInternal$1$$anonfun$apply$24.apply(RDD.scala:858)
at org.apache.spark.rdd.RDD$$anonfun$mapPartitionsInternal$1$$anonfun$apply$24.apply(RDD.scala:858)
at org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:52)
at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:346)
at org.apache.spark.rdd.RDD.iterator(RDD.scala:310)
at org.apache.spark.sql.execution.SQLExecutionRDD.compute(SQLExecutionRDD.scala:55)
at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:346)
at org.apache.spark.rdd.RDD.iterator(RDD.scala:310)
at org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:52)
at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:346)
at org.apache.spark.rdd.RDD.iterator(RDD.scala:310)
at org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:90)
at org.apache.spark.scheduler.Task.run(Task.scala:123)
at org.apache.spark.executor.Executor$TaskRunner$$anonfun$10.apply(Executor.scala:408)
at org.apache.spark.util.Utils$.tryWithSafeFinally(Utils.scala:1360)
at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:414)
at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149)
at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624)
at java.lang.Thread.run(Thread.java:748)
20/10/16 14:20:38 ERROR Executor: Exception in task 3.0 in stage 6.0 (TID 24)
java.lang.UnsatisfiedLinkError: com.microsoft.ml.lightgbm.lightgbmlibJNI.LGBM_NetworkFree()I
at com.microsoft.ml.lightgbm.lightgbmlibJNI.LGBM_NetworkFree(Native Method)
at com.microsoft.ml.lightgbm.lightgbmlib.LGBM_NetworkFree(lightgbmlib.java:209)
at com.microsoft.ml.spark.lightgbm.TrainUtils$.trainLightGBM(TrainUtils.scala:415)
at com.microsoft.ml.spark.lightgbm.LightGBMBase$$anonfun$6.apply(LightGBMBase.scala:85)
at com.microsoft.ml.spark.lightgbm.LightGBMBase$$anonfun$6.apply(LightGBMBase.scala:85)
at org.apache.spark.sql.execution.MapPartitionsExec$$anonfun$5.apply(objects.scala:188)
at org.apache.spark.sql.execution.MapPartitionsExec$$anonfun$5.apply(objects.scala:185)
at org.apache.spark.rdd.RDD$$anonfun$mapPartitionsInternal$1$$anonfun$apply$24.apply(RDD.scala:858)
at org.apache.spark.rdd.RDD$$anonfun$mapPartitionsInternal$1$$anonfun$apply$24.apply(RDD.scala:858)
at org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:52)
at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:346)
at org.apache.spark.rdd.RDD.iterator(RDD.scala:310)
at org.apache.spark.sql.execution.SQLExecutionRDD.compute(SQLExecutionRDD.scala:55)
at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:346)
at org.apache.spark.rdd.RDD.iterator(RDD.scala:310)
at org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:52)
at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:346)
at org.apache.spark.rdd.RDD.iterator(RDD.scala:310)
at org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:90)
at org.apache.spark.scheduler.Task.run(Task.scala:123)
at org.apache.spark.executor.Executor$TaskRunner$$anonfun$10.apply(Executor.scala:408)
at org.apache.spark.util.Utils$.tryWithSafeFinally(Utils.scala:1360)
at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:414)
at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149)
at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624)
at java.lang.Thread.run(Thread.java:748)
20/10/16 14:20:38 ERROR Executor: Exception in task 1.0 in stage 6.0 (TID 22)
java.lang.UnsatisfiedLinkError: com.microsoft.ml.lightgbm.lightgbmlibJNI.LGBM_NetworkFree()I
at com.microsoft.ml.lightgbm.lightgbmlibJNI.LGBM_NetworkFree(Native Method)
at com.microsoft.ml.lightgbm.lightgbmlib.LGBM_NetworkFree(lightgbmlib.java:209)
at com.microsoft.ml.spark.lightgbm.TrainUtils$.trainLightGBM(TrainUtils.scala:415)
at com.microsoft.ml.spark.lightgbm.LightGBMBase$$anonfun$6.apply(LightGBMBase.scala:85)
at com.microsoft.ml.spark.lightgbm.LightGBMBase$$anonfun$6.apply(LightGBMBase.scala:85)
at org.apache.spark.sql.execution.MapPartitionsExec$$anonfun$5.apply(objects.scala:188)
at org.apache.spark.sql.execution.MapPartitionsExec$$anonfun$5.apply(objects.scala:185)
at org.apache.spark.rdd.RDD$$anonfun$mapPartitionsInternal$1$$anonfun$apply$24.apply(RDD.scala:858)
at org.apache.spark.rdd.RDD$$anonfun$mapPartitionsInternal$1$$anonfun$apply$24.apply(RDD.scala:858)
at org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:52)
at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:346)
at org.apache.spark.rdd.RDD.iterator(RDD.scala:310)
at org.apache.spark.sql.execution.SQLExecutionRDD.compute(SQLExecutionRDD.scala:55)
at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:346)
at org.apache.spark.rdd.RDD.iterator(RDD.scala:310)
at org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:52)
at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:346)
at org.apache.spark.rdd.RDD.iterator(RDD.scala:310)
at org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:90)
at org.apache.spark.scheduler.Task.run(Task.scala:123)
at org.apache.spark.executor.Executor$TaskRunner$$anonfun$10.apply(Executor.scala:408)
at org.apache.spark.util.Utils$.tryWithSafeFinally(Utils.scala:1360)
at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:414)
at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149)
at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624)
at java.lang.Thread.run(Thread.java:748)
20/10/16 14:20:38 ERROR Executor: Exception in task 2.0 in stage 6.0 (TID 23)
java.lang.UnsatisfiedLinkError: com.microsoft.ml.lightgbm.lightgbmlibJNI.LGBM_NetworkFree()I
at com.microsoft.ml.lightgbm.lightgbmlibJNI.LGBM_NetworkFree(Native Method)
at com.microsoft.ml.lightgbm.lightgbmlib.LGBM_NetworkFree(lightgbmlib.java:209)
at com.microsoft.ml.spark.lightgbm.TrainUtils$.trainLightGBM(TrainUtils.scala:415)
at com.microsoft.ml.spark.lightgbm.LightGBMBase$$anonfun$6.apply(LightGBMBase.scala:85)
at com.microsoft.ml.spark.lightgbm.LightGBMBase$$anonfun$6.apply(LightGBMBase.scala:85)
at org.apache.spark.sql.execution.MapPartitionsExec$$anonfun$5.apply(objects.scala:188)
at org.apache.spark.sql.execution.MapPartitionsExec$$anonfun$5.apply(objects.scala:185)
at org.apache.spark.rdd.RDD$$anonfun$mapPartitionsInternal$1$$anonfun$apply$24.apply(RDD.scala:858)
at org.apache.spark.rdd.RDD$$anonfun$mapPartitionsInternal$1$$anonfun$apply$24.apply(RDD.scala:858)
at org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:52)
at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:346)
at org.apache.spark.rdd.RDD.iterator(RDD.scala:310)
at org.apache.spark.sql.execution.SQLExecutionRDD.compute(SQLExecutionRDD.scala:55)
at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:346)
at org.apache.spark.rdd.RDD.iterator(RDD.scala:310)
at org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:52)
at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:346)
at org.apache.spark.rdd.RDD.iterator(RDD.scala:310)
at org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:90)
at org.apache.spark.scheduler.Task.run(Task.scala:123)
at org.apache.spark.executor.Executor$TaskRunner$$anonfun$10.apply(Executor.scala:408)
at org.apache.spark.util.Utils$.tryWithSafeFinally(Utils.scala:1360)
at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:414)
at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149)
at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624)
at java.lang.Thread.run(Thread.java:748)
20/10/16 14:20:38 WARN TaskSetManager: Lost task 5.0 in stage 6.0 (TID 26, localhost, executor driver): java.lang.UnsatisfiedLinkError: com.microsoft.ml.lightgbm.lightgbmlibJNI.LGBM_NetworkFree()I
at com.microsoft.ml.lightgbm.lightgbmlibJNI.LGBM_NetworkFree(Native Method)
at com.microsoft.ml.lightgbm.lightgbmlib.LGBM_NetworkFree(lightgbmlib.java:209)
at com.microsoft.ml.spark.lightgbm.TrainUtils$.trainLightGBM(TrainUtils.scala:415)
at com.microsoft.ml.spark.lightgbm.LightGBMBase$$anonfun$6.apply(LightGBMBase.scala:85)
at com.microsoft.ml.spark.lightgbm.LightGBMBase$$anonfun$6.apply(LightGBMBase.scala:85)
at org.apache.spark.sql.execution.MapPartitionsExec$$anonfun$5.apply(objects.scala:188)
at org.apache.spark.sql.execution.MapPartitionsExec$$anonfun$5.apply(objects.scala:185)
at org.apache.spark.rdd.RDD$$anonfun$mapPartitionsInternal$1$$anonfun$apply$24.apply(RDD.scala:858)
at org.apache.spark.rdd.RDD$$anonfun$mapPartitionsInternal$1$$anonfun$apply$24.apply(RDD.scala:858)
at org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:52)
at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:346)
at org.apache.spark.rdd.RDD.iterator(RDD.scala:310)
at org.apache.spark.sql.execution.SQLExecutionRDD.compute(SQLExecutionRDD.scala:55)
at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:346)
at org.apache.spark.rdd.RDD.iterator(RDD.scala:310)
at org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:52)
at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:346)
at org.apache.spark.rdd.RDD.iterator(RDD.scala:310)
at org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:90)
at org.apache.spark.scheduler.Task.run(Task.scala:123)
at org.apache.spark.executor.Executor$TaskRunner$$anonfun$10.apply(Executor.scala:408)
at org.apache.spark.util.Utils$.tryWithSafeFinally(Utils.scala:1360)
at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:414)
at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149)
at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624)
at java.lang.Thread.run(Thread.java:748)
20/10/16 14:20:38 ERROR TaskSetManager: Task 5 in stage 6.0 failed 1 times; aborting job
71%|███████ | 17/24 [00:30<00:12, 1.82s/it]
============================== slowest durations ===============================
46.60s call tests/smoke/test_notebooks_pyspark.py::test_als_pyspark_smoke
31.04s call tests/smoke/test_notebooks_pyspark.py::test_mmlspark_lightgbm_criteo_smoke
5.76s call tests/smoke/test_criteo.py::test_criteo_load_spark_df
4.52s call tests/smoke/test_movielens.py::test_load_spark_df[100k-100000-1682-1-Toy Story (1995)-Animation|Children's|Comedy-1995]
3.00s setup tests/smoke/test_criteo.py::test_criteo_load_spark_df
0.57s teardown tests/smoke/test_notebooks_pyspark.py::test_mmlspark_lightgbm_criteo_smoke
(6 durations < 0.005s hidden. Use -vv to show these durations.)
=========================== short test summary info ============================
FAILED tests/smoke/test_notebooks_pyspark.py::test_mmlspark_lightgbm_criteo_smoke
====== 1 failed, 3 passed, 32 deselected, 68 warnings in 95.12s (0:01:35) ======
The solution proposed by @gramhagen was to remove the following files from the DSVM path: /dsvm/tools/spark/current/jars
Azure_mmlspark-0.12.jar
com.microsoft.cntk_cntk-2.4.jar
com.microsoft.ml.lightgbm_lightgbmlib-2.0.120.jar
Banibrata De is looking to remove these files from the default DSVM
conda activate reco_base
pytest tests/integration -m "integration and not spark and not gpu" --durations 0 --disable-warnings
============================= test session starts ==============================
platform linux -- Python 3.6.11, pytest-6.1.1, py-1.9.0, pluggy-0.13.1
rootdir: /home/recocat/notebooks/repos/recommenders
collected 42 items / 20 deselected / 22 selected
tests/integration/test_covid.py s [ 4%]
tests/integration/test_criteo.py . [ 9%]
tests/integration/test_mind.py .. [ 18%]
tests/integration/test_movielens.py ......... [ 59%]
tests/integration/test_notebooks_python.py ......... [100%]
============================== slowest durations ===============================
3186.29s call tests/integration/test_notebooks_python.py::test_vw_deep_dive_integration[1m-expected_values0]
1007.18s call tests/integration/test_notebooks_python.py::test_geoimc_integration[expected_values0]
787.81s call tests/integration/test_notebooks_python.py::test_sar_single_node_integration[10m-expected_values1]
508.86s call tests/integration/test_notebooks_python.py::test_nni_tuning_svd
429.95s call tests/integration/test_notebooks_python.py::test_surprise_svd_integration[1m-expected_values0]
422.72s call tests/integration/test_criteo.py::test_criteo_load_pandas_df
250.70s call tests/integration/test_movielens.py::test_load_pandas_df[20m-20000263-27278-1-Toy Story (1995)-Adventure|Animation|Children|Comedy|Fantasy-1995]
178.40s call tests/integration/test_notebooks_python.py::test_baseline_deep_dive_integration[1m-expected_values0]
162.74s call tests/integration/test_notebooks_python.py::test_cornac_bpr_integration[1m-expected_values0]
127.83s call tests/integration/test_movielens.py::test_load_pandas_df[10m-10000054-10681-1-Toy Story (1995)-Adventure|Animation|Children|Comedy|Fantasy-1995]
64.01s call tests/integration/test_notebooks_python.py::test_sar_single_node_integration[1m-expected_values0]
44.38s call tests/integration/test_mind.py::test_extract_mind
35.77s call tests/integration/test_mind.py::test_download_mind
19.55s call tests/integration/test_notebooks_python.py::test_wikidata_integration
13.74s call tests/integration/test_movielens.py::test_load_pandas_df[1m-1000209-3883-1-Toy Story (1995)-Animation|Children's|Comedy-1995]
6.95s call tests/integration/test_movielens.py::test_download_and_extract_movielens[20m]
6.64s call tests/integration/test_movielens.py::test_load_item_df[20m-27278-1-Toy Story (1995)-Adventure|Animation|Children|Comedy|Fantasy-1995]
2.93s call tests/integration/test_movielens.py::test_download_and_extract_movielens[10m]
2.63s call tests/integration/test_movielens.py::test_load_item_df[10m-10681-1-Toy Story (1995)-Adventure|Animation|Children|Comedy|Fantasy-1995]
0.55s call tests/integration/test_movielens.py::test_load_item_df[1m-3883-1-Toy Story (1995)-Animation|Children's|Comedy-1995]
0.47s call tests/integration/test_movielens.py::test_download_and_extract_movielens[1m]
0.11s teardown tests/integration/test_movielens.py::test_download_and_extract_movielens[20m]
0.11s teardown tests/integration/test_movielens.py::test_load_item_df[20m-27278-1-Toy Story (1995)-Adventure|Animation|Children|Comedy|Fantasy-1995]
0.08s teardown tests/integration/test_movielens.py::test_load_pandas_df[20m-20000263-27278-1-Toy Story (1995)-Adventure|Animation|Children|Comedy|Fantasy-1995]
0.05s teardown tests/integration/test_movielens.py::test_load_item_df[10m-10681-1-Toy Story (1995)-Adventure|Animation|Children|Comedy|Fantasy-1995]
0.05s teardown tests/integration/test_movielens.py::test_download_and_extract_movielens[10m]
0.04s teardown tests/integration/test_movielens.py::test_load_pandas_df[10m-10000054-10681-1-Toy Story (1995)-Adventure|Animation|Children|Comedy|Fantasy-1995]
0.01s teardown tests/integration/test_notebooks_python.py::test_nni_tuning_svd
0.01s teardown tests/integration/test_movielens.py::test_load_pandas_df[1m-1000209-3883-1-Toy Story (1995)-Animation|Children's|Comedy-1995]
(36 durations < 0.005s hidden. Use -vv to show these durations.)
==== 21 passed, 1 skipped, 20 deselected, 61 warnings in 7263.51s (2:01:03) ====
conda activate reco_gpu
pytest tests/integration -m "integration and not spark and gpu" --durations 0 --disable-warnings
============================= test session starts ==============================
platform linux -- Python 3.6.11, pytest-6.1.1, py-1.9.0, pluggy-0.13.1
rootdir: /home/recocat/notebooks/repos/recommenders
collected 42 items / 29 deselected / 13 selected
tests/integration/test_notebooks_gpu.py ............. [100%]
============================== slowest durations ===============================
1506.74s call tests/integration/test_notebooks_gpu.py::test_wide_deep_integration[1m-50000-expected_values0-42]
1104.02s call tests/integration/test_notebooks_gpu.py::test_naml_quickstart_integration[6-42-demo-expected_values0]
796.34s call tests/integration/test_notebooks_gpu.py::test_ncf_integration[1m-10-expected_values0-42]
643.41s call tests/integration/test_notebooks_gpu.py::test_lstur_quickstart_integration[5-40-demo-expected_values0]
641.30s call tests/integration/test_notebooks_gpu.py::test_fastai_integration[1m-10-expected_values0]
617.79s call tests/integration/test_notebooks_gpu.py::test_npa_quickstart_integration[6-42-demo-expected_values0]
563.02s call tests/integration/test_notebooks_gpu.py::test_nrms_quickstart_integration[8-42-demo-expected_values0]
390.31s call tests/integration/test_notebooks_gpu.py::test_xdeepfm_integration[15-10-expected_values0-42]
251.22s call tests/integration/test_notebooks_gpu.py::test_ncf_deep_dive_integration[100k-10-512-expected_values0-42]
234.52s call tests/integration/test_notebooks_gpu.py::test_slirec_quickstart_integration[reco_utils/recommender/deeprec/config/sli_rec.yaml-tests/resources/deeprec/slirec-10-400-expected_values0-2019]
77.90s call tests/integration/test_notebooks_gpu.py::test_dkn_quickstart_integration
19.32s call tests/integration/test_notebooks_gpu.py::test_lightgcn_deep_dive_integration[reco_utils/recommender/deeprec/config/lightgcn.yaml-tests/resources/deeprec/lightgcn-100k-5-1024-expected_values0-42]
0.11s teardown tests/integration/test_notebooks_gpu.py::test_wide_deep_integration[1m-50000-expected_values0-42]
0.10s call tests/integration/test_notebooks_gpu.py::test_gpu_vm
(25 durations < 0.005s hidden. Use -vv to show these durations.)
========= 13 passed, 29 deselected, 65 warnings in 6848.94s (1:54:08) ==========
conda activate reco_pyspark
pytest tests/integration -m "integration and spark and not gpu" --durations 0 --disable-warnings
============================= test session starts ==============================
platform linux -- Python 3.6.11, pytest-6.1.1, py-1.9.0, pluggy-0.13.1
rootdir: /home/recocat/notebooks/repos/recommenders
collected 42 items / 36 deselected / 6 selected
tests/integration/test_criteo.py . [ 16%]
tests/integration/test_movielens.py ... [ 66%]
tests/integration/test_notebooks_pyspark.py .s [100%]
============================== slowest durations ===============================
367.61s call tests/integration/test_criteo.py::test_criteo_load_spark_df
107.02s call tests/integration/test_movielens.py::test_load_spark_df[20m-20000263-27278-1-Toy Story (1995)-Adventure|Animation|Children|Comedy|Fantasy-1995]
101.66s call tests/integration/test_movielens.py::test_load_spark_df[10m-10000054-10681-1-Toy Story (1995)-Adventure|Animation|Children|Comedy|Fantasy-1995]
92.42s call tests/integration/test_notebooks_pyspark.py::test_als_pyspark_integration
19.05s call tests/integration/test_movielens.py::test_load_spark_df[1m-1000209-3883-1-Toy Story (1995)-Animation|Children's|Comedy-1995]
2.94s setup tests/integration/test_criteo.py::test_criteo_load_spark_df
0.72s teardown tests/integration/test_notebooks_pyspark.py::test_mmlspark_lightgbm_criteo_integration
0.08s teardown tests/integration/test_movielens.py::test_load_spark_df[20m-20000263-27278-1-Toy Story (1995)-Adventure|Animation|Children|Comedy|Fantasy-1995]
0.04s teardown tests/integration/test_movielens.py::test_load_spark_df[10m-10000054-10681-1-Toy Story (1995)-Adventure|Animation|Children|Comedy|Fantasy-1995]
0.01s teardown tests/integration/test_movielens.py::test_load_spark_df[1m-1000209-3883-1-Toy Story (1995)-Animation|Children's|Comedy-1995]
(7 durations < 0.005s hidden. Use -vv to show these durations.)
===== 5 passed, 1 skipped, 36 deselected, 54 warnings in 694.28s (0:11:34) =====
"col_rating": DEFAULT_RATING_COL,
}
# instantiate the the affinity matrix
am = AffinityMatrix(DF=python_dataset, **header)
# generate the sparse matrix representation
X = am.gen_affinity_matrix()
# use the inverse function to generate a pandas df from a sparse matrix ordered by userID
> DF = am.map_back_sparse(X, kind="ratings")
tests/unit/test_sparse.py:120:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <reco_utils.dataset.sparse.AffinityMatrix object at 0x7fd5f82dddd8>
X = (array([[5, 5, 1, 3, 4, 4, 4, 1, 5, 2, 1, 4, 3, 2, 5, 3, 2, 2, 4, 2, 3, 2,
4, 2, 4, 2, 1, 3, 3, 1, 1, 2, 1, 1,..., 3, 0, 1, 0, 5, 2, 2, 4,
3, 3, 4, 0, 1, 0]]), {1: 0, 2: 1, 3: 2, 4: 3, ...}, {1: 0, 2: 23, 3: 24, 4: 25, ...})
kind = 'ratings'
def map_back_sparse(self, X, kind):
"""Map back the user/affinity matrix to a pd dataframe
Args:
X (np.array, int32): user/item affinity matrix
kind (string): specify if the output values are ratings or predictions
Returns:
pd.DataFrame: the generated pandas dataframe
"""
> m, n = X.shape
E AttributeError: 'tuple' object has no attribute 'shape'
reco_utils/dataset/sparse.py:150: AttributeError
fixed with https://github.com/microsoft/recommenders/pull/1243
issue with cudnn on dsvm_nightly_linux_gpu
Description
from https://github.com/microsoft/recommenders/tree/master/tests
Make sure all tests pass:
Unit:
Smoke:
Integration:
Expected behavior with the suggested feature
Other Comments