PennyWieser / Thermobar

Python thermobarometry tool
40 stars 10 forks source link

The trouble about using model of Jorgenson et al.,2022 #42

Closed CHNxzr closed 8 months ago

CHNxzr commented 8 months ago

I meet difficulties when exerting Jorgenson‘s model. It always suggests that there is a value error. Thermobar_1

PennyWieser commented 8 months ago

Hi, Can you attach or email the notebook and any files it needs to read so I can diagnose further. Also what version of sklearn are you on? And what version of thermobar

CHNxzr commented 8 months ago

The version of thermobar is 1.0.35, and the sklearn is 1.3.0. Here are the files that I Cpx_Opx_thermobar.xlsx input.

PennyWieser commented 8 months ago

Can you also provide the code that is giving the error as well as the other code leading up to that point?

CHNxzr commented 8 months ago

Sure. P_T_EqTests_pkl=pt.calculate_cpx_liq_press_temp(cpx_comps=Cpxs, liq_comps=Liqs, equationP="P_Jorgenson2022_Cpx_Liq", equationT="T_Jorgenson2022_Cpx_Liq", H2O_Liq=3, eq_tests=False)


ValueError Traceback (most recent call last) Cell In[15], line 1 ----> 1 P_T_EqTests_pkl=pt.calculate_cpx_liq_press_temp(cpx_comps=Cpxs, liq_comps=Liqs, 2 equationP="P_Jorgenson2022_Cpx_Liq", 3 equationT="T_Jorgenson2022_Cpx_Liq", 4 H2O_Liq=3, eq_tests=False)

File D:\anaconda3\Lib\site-packages\Thermobar\clinopyroxene_thermobarometry.py:2390, in calculate_cpx_liq_press_temp(liq_comps, cpx_comps, meltmatch, equationP, equationT, T, P, iterations, Fe3Fet_Liq, H2O_Liq, T_K_guess, eq_tests) 2388 if equationT is not None: 2389 if ('Petrelli' in equationT or "Jorgenson" in equationT) and "onnx" not in equationT: -> 2390 T_func_all=calculate_cpx_liq_temp(meltmatch=Combo_liq_cpxs, 2391 equationT=equationT, P="Solve") 2392 T_func = T_func_all.T_K_calc 2393 Median_T=T_func_all.Median_Trees

File D:\anaconda3\Lib\site-packages\Thermobar\clinopyroxene_thermobarometry.py:2214, in calculate_cpx_liq_temp(equationT, cpx_comps, liq_comps, meltmatch, P, eq_tests, H2O_Liq, Fe3Fet_Liq, sigma, Kd_Err) 2212 # Easiest to treat Machine Learning ones differently 2213 if ('Petrelli' in equationT or "Jorgenson" in equationT) and "onnx" not in equationT: -> 2214 df_stats=func(meltmatch=Combo_liq_cpxs) 2215 T_K=df_stats['T_K_calc'] 2217 elif ('Petrelli' in equationT or "Jorgenson" in equationT) and "onnx" in equationT:

File D:\anaconda3\Lib\site-packages\Thermobar\clinopyroxene_thermobarometry.py:943, in T_Jorgenson2022_Cpx_Liq(P, cpx_comps, liq_comps, meltmatch) 939 with open(Thermobar_dir/'ETR_Temp_Jorg21_Cpx_Liq_NotNorm_sklearn_1_3.pkl', 'rb') as f: 940 ETR_Temp_J22_Cpx_Liq=joblib.load(f) --> 943 Pred_T_K=ETR_Temp_J22_Cpx_Liq.predict(x_test) 944 df_stats, df_voting=get_voting_stats_ExtraTreesRegressor(x_test, ETR_Temp_J22_Cpx_Liq) 945 df_stats.insert(0, 'T_K_calc', Pred_T_K)

File D:\anaconda3\Lib\site-packages\sklearn\ensemble_forest.py:984, in ForestRegressor.predict(self, X) 982 check_is_fitted(self) 983 # Check data --> 984 X = self._validate_X_predict(X) 986 # Assign chunk of trees to jobs 987 njobs, , _ = _partition_estimators(self.n_estimators, self.n_jobs)

File D:\anaconda3\Lib\site-packages\sklearn\ensemble_forest.py:599, in BaseForest._validate_X_predict(self, X) 596 """ 597 Validate X whenever one tries to predict, apply, predict_proba.""" 598 check_is_fitted(self) --> 599 X = self._validate_data(X, dtype=DTYPE, accept_sparse="csr", reset=False) 600 if issparse(X) and (X.indices.dtype != np.intc or X.indptr.dtype != np.intc): 601 raise ValueError("No support for np.int64 index based sparse matrices")

File D:\anaconda3\Lib\site-packages\sklearn\base.py:604, in BaseEstimator._validate_data(self, X, y, reset, validate_separately, cast_to_ndarray, check_params) 602 out = X, y 603 elif not no_val_X and no_val_y: --> 604 out = check_array(X, input_name="X", check_params) 605 elif no_val_X and not no_val_y: 606 out = _check_y(y, **check_params)

File D:\anaconda3\Lib\site-packages\sklearn\utils\validation.py:959, in check_array(array, accept_sparse, accept_large_sparse, dtype, order, copy, force_all_finite, ensure_2d, allow_nd, ensure_min_samples, ensure_min_features, estimator, input_name) 953 raise ValueError( 954 "Found array with dim %d. %s expected <= 2." 955 % (array.ndim, estimator_name) 956 ) 958 if force_all_finite: --> 959 _assert_all_finite( 960 array, 961 input_name=input_name, 962 estimator_name=estimator_name, 963 allow_nan=force_all_finite == "allow-nan", 964 ) 966 if ensure_min_samples > 0: 967 n_samples = _num_samples(array)

File D:\anaconda3\Lib\site-packages\sklearn\utils\validation.py:124, in _assert_all_finite(X, allow_nan, msg_dtype, estimator_name, input_name) 121 if first_pass_isfinite: 122 return --> 124 _assert_all_finite_element_wise( 125 X, 126 xp=xp, 127 allow_nan=allow_nan, 128 msg_dtype=msg_dtype, 129 estimator_name=estimator_name, 130 input_name=input_name, 131 )

File D:\anaconda3\Lib\site-packages\sklearn\utils\validation.py:173, in _assert_all_finite_element_wise(X, xp, allow_nan, msg_dtype, estimator_name, input_name) 156 if estimator_name and input_name == "X" and has_nan_error: 157 # Improve the error message on how to handle missing values in 158 # scikit-learn. 159 msg_err += ( 160 f"\n{estimator_name} does not accept missing values" 161 " encoded as NaN natively. For supervised learning, you might want" (...) 171 "#estimators-that-handle-nan-values" 172 ) --> 173 raise ValueError(msg_err)

ValueError: Input X contains NaN. ExtraTreesRegressor does not accept missing values encoded as NaN natively. For supervised learning, you might want to consider sklearn.ensemble.HistGradientBoostingClassifier and Regressor which accept missing values encoded as NaNs natively. Alternatively, it is possible to preprocess the data, for instance by using an imputer transformer in a pipeline or drop samples with missing values. See https://scikit-learn.org/stable/modules/impute.html You can find a list of all estimators that handle NaN values at the following page: https://scikit-learn.org/stable/modules/impute.html#estimators-that-handle-nan-values

PennyWieser commented 8 months ago

Can you please send the entire code up to this error so I can check the loading steps? I can't trouble shoot with just this. The Nan part of the error makes it seem like an issue related to loading.

CHNxzr commented 8 months ago

Of course. import numpy as np import pandas as pd import matplotlib.pyplot as plt import Thermobar as pt pt.version

'1.0.35'

out=pt.import_excel('Cpx_Opx_thermobar.xlsx', sheet_name="Sheet1") my_input=out['my_input'] Liqs=out['Liqs'] Cpxs=out['Cpxs']

import joblib as j j.version '1.2.0'

P_T_EqTests_pkl=pt.calculate_cpx_liq_press_temp(cpx_comps=Cpxs, liq_comps=Liqs, equationP="P_Jorgenson2022_Cpx_Liq", equationT="T_Jorgenson2022_Cpx_Liq", H2O_Liq=0, eq_tests=False) Im normalizing using the Jorgenson method, e.g. 100 total, 2dp Im normalizing using the Jorgenson method, e.g. 100 total, 2dp Youve selected a P-independent function


ValueError Traceback (most recent call last) Cell In[4], line 1 ----> 1 P_T_EqTests_pkl=pt.calculate_cpx_liq_press_temp(cpx_comps=Cpxs, liq_comps=Liqs, 2 equationP="P_Jorgenson2022_Cpx_Liq", 3 equationT="T_Jorgenson2022_Cpx_Liq", 4 H2O_Liq=0, eq_tests=False)

File D:\anaconda3\Lib\site-packages\Thermobar\clinopyroxene_thermobarometry.py:2390, in calculate_cpx_liq_press_temp(liq_comps, cpx_comps, meltmatch, equationP, equationT, T, P, iterations, Fe3Fet_Liq, H2O_Liq, T_K_guess, eq_tests) 2388 if equationT is not None: 2389 if ('Petrelli' in equationT or "Jorgenson" in equationT) and "onnx" not in equationT: -> 2390 T_func_all=calculate_cpx_liq_temp(meltmatch=Combo_liq_cpxs, 2391 equationT=equationT, P="Solve") 2392 T_func = T_func_all.T_K_calc 2393 Median_T=T_func_all.Median_Trees

File D:\anaconda3\Lib\site-packages\Thermobar\clinopyroxene_thermobarometry.py:2214, in calculate_cpx_liq_temp(equationT, cpx_comps, liq_comps, meltmatch, P, eq_tests, H2O_Liq, Fe3Fet_Liq, sigma, Kd_Err) 2212 # Easiest to treat Machine Learning ones differently 2213 if ('Petrelli' in equationT or "Jorgenson" in equationT) and "onnx" not in equationT: -> 2214 df_stats=func(meltmatch=Combo_liq_cpxs) 2215 T_K=df_stats['T_K_calc'] 2217 elif ('Petrelli' in equationT or "Jorgenson" in equationT) and "onnx" in equationT:

File D:\anaconda3\Lib\site-packages\Thermobar\clinopyroxene_thermobarometry.py:943, in T_Jorgenson2022_Cpx_Liq(P, cpx_comps, liq_comps, meltmatch) 939 with open(Thermobar_dir/'ETR_Temp_Jorg21_Cpx_Liq_NotNorm_sklearn_1_3.pkl', 'rb') as f: 940 ETR_Temp_J22_Cpx_Liq=joblib.load(f) --> 943 Pred_T_K=ETR_Temp_J22_Cpx_Liq.predict(x_test) 944 df_stats, df_voting=get_voting_stats_ExtraTreesRegressor(x_test, ETR_Temp_J22_Cpx_Liq) 945 df_stats.insert(0, 'T_K_calc', Pred_T_K)

File D:\anaconda3\Lib\site-packages\sklearn\ensemble_forest.py:984, in ForestRegressor.predict(self, X) 982 check_is_fitted(self) 983 # Check data --> 984 X = self._validate_X_predict(X) 986 # Assign chunk of trees to jobs 987 njobs, , _ = _partition_estimators(self.n_estimators, self.n_jobs)

File D:\anaconda3\Lib\site-packages\sklearn\ensemble_forest.py:599, in BaseForest._validate_X_predict(self, X) 596 """ 597 Validate X whenever one tries to predict, apply, predict_proba.""" 598 check_is_fitted(self) --> 599 X = self._validate_data(X, dtype=DTYPE, accept_sparse="csr", reset=False) 600 if issparse(X) and (X.indices.dtype != np.intc or X.indptr.dtype != np.intc): 601 raise ValueError("No support for np.int64 index based sparse matrices")

File D:\anaconda3\Lib\site-packages\sklearn\base.py:604, in BaseEstimator._validate_data(self, X, y, reset, validate_separately, cast_to_ndarray, check_params) 602 out = X, y 603 elif not no_val_X and no_val_y: --> 604 out = check_array(X, input_name="X", check_params) 605 elif no_val_X and not no_val_y: 606 out = _check_y(y, **check_params)

File D:\anaconda3\Lib\site-packages\sklearn\utils\validation.py:959, in check_array(array, accept_sparse, accept_large_sparse, dtype, order, copy, force_all_finite, ensure_2d, allow_nd, ensure_min_samples, ensure_min_features, estimator, input_name) 953 raise ValueError( 954 "Found array with dim %d. %s expected <= 2." 955 % (array.ndim, estimator_name) 956 ) 958 if force_all_finite: --> 959 _assert_all_finite( 960 array, 961 input_name=input_name, 962 estimator_name=estimator_name, 963 allow_nan=force_all_finite == "allow-nan", 964 ) 966 if ensure_min_samples > 0: 967 n_samples = _num_samples(array)

File D:\anaconda3\Lib\site-packages\sklearn\utils\validation.py:124, in _assert_all_finite(X, allow_nan, msg_dtype, estimator_name, input_name) 121 if first_pass_isfinite: 122 return --> 124 _assert_all_finite_element_wise( 125 X, 126 xp=xp, 127 allow_nan=allow_nan, 128 msg_dtype=msg_dtype, 129 estimator_name=estimator_name, 130 input_name=input_name, 131 )

File D:\anaconda3\Lib\site-packages\sklearn\utils\validation.py:173, in _assert_all_finite_element_wise(X, xp, allow_nan, msg_dtype, estimator_name, input_name) 156 if estimator_name and input_name == "X" and has_nan_error: 157 # Improve the error message on how to handle missing values in 158 # scikit-learn. 159 msg_err += ( 160 f"\n{estimator_name} does not accept missing values" 161 " encoded as NaN natively. For supervised learning, you might want" (...) 171 "#estimators-that-handle-nan-values" 172 ) --> 173 raise ValueError(msg_err)

ValueError: Input X contains NaN. ExtraTreesRegressor does not accept missing values encoded as NaN natively. For supervised learning, you might want to consider sklearn.ensemble.HistGradientBoostingClassifier and Regressor which accept missing values encoded as NaNs natively. Alternatively, it is possible to preprocess the data, for instance by using an imputer transformer in a pipeline or drop samples with missing values. See https://scikit-learn.org/stable/modules/impute.html You can find a list of all estimators that handle NaN values at the following page: https://scikit-learn.org/stable/modules/impute.html#estimators-that-handle-nan-values

PennyWieser commented 8 months ago

Hi, the issue is your liquid dataframe is empty

image

I recommend you inspect all dataframes you are inputting in future after loading The excel file you attached doesnt even have any liquids in it.

If you want to use the Cpx only model, make sure to call that instead.