Hey! I've been trying to run this script, although I am getting some troubles, which I suspect it may be due to a divergence on libraries versions.

DeepChem 1.x (Requires RDKit) Pandas (Prediction is tested with Pandas 0.22) Tensorflow 1.3 Keras XGBoost ScikitLearn

The libraries that I have installed are (pip):

(test-env) benjamin@benjamin-T460:~/owen$ pip list
(test-env) benjamin@benjamin-T460:~/owen$ pip list
Package                 Version
----------------------- ---------------
apturl                  0.5.2
asttokens               2.0.8
backcall                0.2.0
bcrypt                  3.2.0
blinker                 1.4
Brlapi                  0.8.3
certifi                 2020.6.20
chardet                 4.0.0
cli-helpers             2.2.1
click                   8.0.3
cmake                   3.22.5
colorama                0.4.4
command-not-found       0.3
configobj               5.0.6
cryptography            3.4.8
cupshelpers             1.0
dbus-python             1.2.18
decorator               5.1.1
defer                   1.0.6            0.1.1
distlib                 0.3.5
distro                  1.7.0
distro-info             1.1build1
dlib                    19.24.0
duplicity               0.8.21
executing               1.1.0
face-recognition        1.3.0
face-recognition-models 0.3.0
fasteners               0.14.1
filelock                3.7.1
Flask                   2.1.2
Flask-Cors              3.0.10
frida                   15.1.22
frida-tools             10.6.1
future                  0.18.2
gpg                     1.16.0-unknown
httplib2                0.20.2
idna                    3.3
importlib-metadata      4.6.4
ipython                 8.5.0
itsdangerous            2.1.2
jedi                    0.18.1
jeepney                 0.7.1
Jinja2                  3.1.2
keyring                 23.5.0
language-selector       0.1
launchpadlib            1.10.16
lazr.restfulclient      0.14.4
lazr.uri                1.0.6
litecli                 1.8.0
lockfile                0.12.2
louis                   3.20.0
macaroonbakery          1.3.1
Mako                    1.1.3
MarkupSafe              2.0.1
matplotlib-inline       0.1.6
monotonic               1.6
more-itertools          8.10.0
mysql-connector-python  8.0.30
netifaces               0.11.0
numpy                   1.23.1
oauthlib                3.2.0
objection               1.11.0
olefile                 0.46
paramiko                2.9.3
parso                   0.8.3
pexpect                 4.8.0
pickleshare             0.7.5
Pillow                  9.0.1
pip                     22.2.2
pipenv                  2022.7.4
platformdirs            2.5.2
prompt-toolkit          3.0.29
protobuf                3.12.4
ptyprocess              0.7.0
pure-eval               0.2.2
pycairo                 1.20.1
pycups                  2.0.1
Pygments                2.12.0
PyGObject               3.42.1
PyJWT                   2.4.0
pymacaroons             0.13.0
PyNaCl                  1.5.0
pyparsing               2.4.7
PyPDF2                  2.8.1
pyRFC3339               1.1
python-apt              2.3.0+ubuntu2.1
python-dateutil         2.8.1
python-debian           0.1.43ubuntu1
python-dotenv           0.21.0
pytz                    2022.1
pyxdg                   0.27
PyYAML                  5.4.1
rdkit                   2022.3.5
reportlab               3.6.8
requests                2.25.1
SecretStorage           3.3.1
semver                  2.13.0
setuptools              59.6.0
six                     1.16.0
sqlparse                0.4.2
stack-data              0.5.1
systemd-python          234
tabulate                0.8.9
traitlets               5.4.0
ubuntu-advantage-tools  27.9
ubuntu-drivers-common   0.0.0
ufw                     0.36.1
unattended-upgrades     0.1
urllib3                 1.26.5
usb-creator             0.3.7
vboxapi                 1.0
virtualenv              20.15.1
virtualenv-clone        0.5.7
wadllib                 1.3.6
wcwidth                 0.2.5
websockets              10.3
Werkzeug                2.1.2
wheel                   0.37.1
xdg                     5
xkit                    0.0.0
zipp                    1.0.0
(test-env) benjamin@benjamin-T460:~/owen$ conda list
# packages in environment at /home/benjamin/anaconda3/envs/test-env:
# Name                    Version                   Build  Channel
_libgcc_mutex             0.1                        main  
_openmp_mutex             5.1                       1_gnu  
absl-py                   0.15.0                   pypi_0    pypi
astor                     0.8.1                    pypi_0    pypi
backports                 1.0                        py_2    conda-forge
backports-functools-lru-cache 1.6.4                    pypi_0    pypi
backports-shutil-get-terminal-size 1.0.0                    pypi_0    pypi
backports.weakref         1.0rc1                   py27_1    conda-forge
blas                      1.0                         mkl  
bleach                    1.5.0                    py27_0    conda-forge
blosc                     1.21.0               h8c45485_0  
boost                     1.63.0           py27hd84f722_1    rdkit
bzip2                     1.0.8                h7b6447c_0  
ca-certificates           2022.07.19           h06a4308_0  
cairo                     1.16.0               hf32fb01_1  
certifi                   2020.6.20          pyhd3eb1b0_3  
cycler                    0.10.0                   pypi_0    pypi
decorator                 4.4.2                    pypi_0    pypi
deepchem                  1.3.2.dev290             pypi_0    pypi
eigen                     3.3.7                hd09550d_1  
enum34                    1.1.10                   pypi_0    pypi
fftw3f                    3.3.4                         2    omnia
fontconfig                2.13.1               h6c09931_0  
freetype                  2.11.0               h70c0345_0  
funcsigs                  1.0.2                      py_3    conda-forge
futures                   3.3.0                    pypi_0    pypi
gast                      0.5.3                    pypi_0    pypi
glib                      2.69.1               h4ff587b_1  
grpcio                    1.41.1                   pypi_0    pypi
h5py                      2.10.0                   pypi_0    pypi
hdf5                      1.10.4               hb1b8bf9_0  
html5lib                  0.9999999                py27_0    conda-forge
icu                       58.2                 he6710b0_3  
intel-openmp              2022.0.1          h06a4308_3633  
ipython                   5.5.0                    pypi_0    pypi
ipython-genutils          0.2.0                    pypi_0    pypi
joblib                    0.13.0                   pypi_0    pypi
jpeg                      9e                   h7f8727e_0  
keras                     2.2.4                    pypi_0    pypi
keras-applications        1.0.8                    pypi_0    pypi
keras-preprocessing       1.1.2                    pypi_0    pypi
lerc                      3.0                  h295c915_0  
libboost                  1.65.1               habcd387_4  
libdeflate                1.8                  h7f8727e_5  
libffi                    3.3                  he6710b0_2  
libgcc-ng                 11.2.0               h1234567_1  
libgfortran-ng            7.5.0               ha8ba4b0_17  
libgfortran4              7.5.0               ha8ba4b0_17  
libgomp                   11.2.0               h1234567_1  
libpng                    1.6.37               hbc83047_0  
libprotobuf               3.11.4               h8b12597_0    conda-forge
libstdcxx-ng              11.2.0               h1234567_1  
libtiff                   4.2.0                h85742a9_0  
libuuid                   1.0.3                h7f8727e_2  
libwebp-base              1.2.2                h7f8727e_0  
libxcb                    1.15                 h7f8727e_0  
libxml2                   2.9.12               h03d6c58_0  
lz4-c                     1.9.3                h295c915_1  
lzo                       2.10                 h7b6447c_2  
markdown                  3.1.1                      py_0    conda-forge
matplotlib                2.1.2                    pypi_0    pypi
mdtraj                    1.9.1                    py27_1    deepchem
mkl                       2020.2                      256  
mkl-service               2.3.0            py27he904b0f_0  
mkl_fft                   1.0.15           py27ha843d7b_0  
mkl_random                1.1.0            py27hd6b4f25_0  
mock                      3.0.5            py27h8c360ce_1    conda-forge
ncurses                   6.3                  h5eee18b_3  
numexpr                   2.7.0            py27h9e4a6bb_0  
numpy                     1.16.6                   pypi_0    pypi
numpy-base                1.16.6           py27hde5b4d6_0  
olefile                   0.46                     py27_0  
openmm                    7.3.1           py27_cuda92_rc_2    omnia
pandas                    0.22.0                   pypi_0    pypi
pathlib2                  2.3.7.post1              pypi_0    pypi
pcre                      8.45                 h295c915_0  
pdbfixer                  1.4                      py27_0    omnia
pexpect                   4.8.0                    pypi_0    pypi
pickleshare               0.7.5                    pypi_0    pypi
pillow                    6.2.1            py27h34e0f95_0  
pip                       19.3.1                   py27_0  
pixman                    0.40.0               h7f8727e_1  
prompt-toolkit            1.0.18                   pypi_0    pypi
protobuf                  3.11.4           py27he1b5a44_0    conda-forge
ptyprocess                0.7.0                    pypi_0    pypi
py-boost                  1.65.1           py27hf484d3e_4  
pygments                  2.5.2                    pypi_0    pypi
pyparsing                 2.4.7                    pypi_0    pypi
pytables                  3.5.2            py27h71ec239_1  
python                    2.7.18               ha1903f6_2  
python-dateutil           2.8.2              pyhd3eb1b0_0  
python_abi                2.7                    1_cp27mu    conda-forge
pytz                      2021.3             pyhd3eb1b0_0  
pyyaml                    5.4.1                    pypi_0    pypi
rdkit                     2018.09.1.0      py27h71b666b_1    rdkit
readline                  8.1.2                h7f8727e_1  
scandir                   1.10.0                   pypi_0    pypi
scikit-learn              0.19.1                   pypi_0    pypi
scipy                     1.2.3                    pypi_0    pypi
setuptools                44.0.0                   py27_0  
simdna                    0.4.2                    pypi_0    pypi
simplegeneric             0.8.1                    pypi_0    pypi
six                       1.16.0             pyhd3eb1b0_1  
sqlite                    3.38.2               hc218d9a_0  
subprocess32              3.5.4                    pypi_0    pypi
tensorboard               1.6.0                    pypi_0    pypi
tensorflow                1.6.0                    pypi_0    pypi
tensorflow-estimator      1.13.0                   pypi_0    pypi
tensorflow-tensorboard    1.5.1                    pypi_0    pypi
termcolor                 1.1.0                    pypi_0    pypi
tk                        8.6.11               h1ccaba5_0  
traitlets                 4.3.3                    pypi_0    pypi
typing                           pypi_0    pypi
wcwidth                   0.2.5                    pypi_0    pypi
webencodings              0.5.1                      py_1    conda-forge
werkzeug                  1.0.1              pyh9f0ad1d_0    conda-forge
wheel                     0.37.1             pyhd3eb1b0_0  
xgboost                   0.80                     pypi_0    pypi
xz                        5.2.6                h5eee18b_0  
zlib                      1.2.11               h7f8727e_4  
zstd                      1.4.9                haebb681_0  

I am running the script in a conda venv,

Thank you very much for your time. I appreciate that even with this repository deactivate, you still helping the community :)

ghost commented 2 years ago

And, just an append, the error/output that I get is:

(test-env) benjamin@benjamin-T460:~/owen/GPCR_LigandClassify$ python --input_file sample_input.csv --output_file output.csv --n_rows_to_read 1200
/home/benjamin/anaconda3/envs/test-env/lib/python2.7/site-packages/sklearn/ensemble/ DeprecationWarning: numpy.core.umath_tests is an internal NumPy module and should not be imported. It will be removed in a future NumPy release.
  from numpy.core.umath_tests import inner1d
Using TensorFlow backend.
PANDAS version  0.22.0
inputfile: /home/benjamin/owen/GPCR_LigandClassify/sample_input.csv
outputfile: /home/benjamin/owen/GPCR_LigandClassify/output_2022-10-07.csv SettingWithCopyWarning: 
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation:
  drug_bank_df_selected_cols.dropna(subset = ['smiles'],inplace=True)
Featurizing sample 0
[21:04:14] Explicit valence for atom # 2 O, 3, is greater than permitted
RDKit ERROR: [21:04:14] Explicit valence for atom # 2 O, 3, is greater than permitted
[21:04:17] Explicit valence for atom # 0 N, 4, is greater than permitted
RDKit ERROR: [21:04:17] Explicit valence for atom # 0 N, 4, is greater than permitted
Featurizing sample 1000
[21:04:30] Explicit valence for atom # 2 O, 3, is greater than permitted
RDKit ERROR: [21:04:30] Explicit valence for atom # 2 O, 3, is greater than permitted
[21:04:31] Explicit valence for atom # 0 N, 4, is greater than permitted
RDKit ERROR: [21:04:31] Explicit valence for atom # 0 N, 4, is greater than permitted
/home/benjamin/anaconda3/envs/test-env/lib/python2.7/site-packages/rdkit/Chem/ SettingWithCopyWarning: 
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation:
  lambda smiles: _MolPlusFingerprint(Chem.MolFromSmiles(smiles))) SettingWithCopyWarning: 
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation:
2022-10-07 21:04:34.752815: I tensorflow/core/platform/] Your CPU supports instructions that this TensorFlow binary was not compiled to use: AVX2 FMA
Loaded model from disk
dl_model_fp prediction made
/home/benjamin/anaconda3/envs/test-env/lib/python2.7/site-packages/sklearn/preprocessing/ DeprecationWarning: The truth value of an empty array is ambiguous. Returning False, but in future this will result in an error. Use `array.size > 0` to check that an array is not empty.
  if diff:
xgb_fp prediction made
Index([u'index', u'drugbank_id', u'name', u'smiles', u'pubchem_substance_id', u'drug_groups', u'prediction_class_dl_model_fp_prediction', u'prediction_class_dl_model_fp_prediction_proba', u'prediction_class_xgb_fp_prediction', u'prediction_class_xgb_fp_prediction_proba', u'Unnamed: 0', u'gpcr_name', u'first_seg', u'second_seg', u'gpcr_binding_encoded'], dtype='object')
Traceback (most recent call last):
  File "", line 308, in <module>
    merged_predictions_selcols = merged_predictions_fullcols[['drugbank_id', 'name', u'pubchem_substance_id', 'drug_groups', 'prediction_class_dl_model_fp_prediction', 'prediction_class_dl_model_fp_prediction_proba', 'prediction_class_mlp_fp_prediction', 'prediction_class_mlp_fp_prediction_proba', 'prediction_class_xgb_fp_prediction', 'prediction_class_xgb_fp_prediction_proba', 'prediction_class_rfc_fp_prediction', 'prediction_class_rfc_fp_prediction_proba', 'prediction_class_svm_fp_prediction', 'prediction_class_svm_fp_prediction_proba', 'first_seg','gpcr_binding_encoded']]
  File "/home/benjamin/anaconda3/envs/test-env/lib/python2.7/site-packages/pandas/core/", line 2133, in __getitem__
    return self._getitem_array(key)
  File "/home/benjamin/anaconda3/envs/test-env/lib/python2.7/site-packages/pandas/core/", line 2177, in _getitem_array
    indexer = self.loc._convert_to_indexer(key, axis=1)
  File "/home/benjamin/anaconda3/envs/test-env/lib/python2.7/site-packages/pandas/core/", line 1269, in _convert_to_indexer
KeyError: "[u'prediction_class_mlp_fp_prediction'\n u'prediction_class_mlp_fp_prediction_proba'\n u'prediction_class_rfc_fp_prediction'\n u'prediction_class_rfc_fp_prediction_proba'\n u'prediction_class_svm_fp_prediction'\n u'prediction_class_svm_fp_prediction_proba'] not in index"