mc2-project / secure-xgboost

Secure collaborative training and inference for XGBoost.
https://mc2-project.github.io/secure-xgboost/
Apache License 2.0
105 stars 32 forks source link

verification failed -- mbedtls_pk_verify returned #137

Closed Coldfire93 closed 3 years ago

Coldfire93 commented 3 years ago

Hi, I run the distributed demo, it works when sh run-local.sh 1 is executed. But sh run-local.sh 2 doesn't work.

The output is as follows: 2021-04-01 08:09:53,248 INFO start listen on 10.1.41.239:9092 2021-04-01 08:09:53,250 DEBUG num of retry 0 2021-04-01 08:09:53,251 DEBUG num of retry 0 Creating enclave Creating enclave Remote attestation 2021-04-01 08:09:54,559 DEBUG Created SlaveEntry for node: 10.1.41.239 task NULL connected to the tracker Remote attestation 2021-04-01 08:09:54,612 DEBUG Created SlaveEntry for node: 10.1.41.239 task NULL connected to the tracker task NULL got new rank 0 2021-04-01 08:09:54,741 DEBUG Recieve start signal from 10.1.41.239; assign rank 0 task NULL got new rank 1 2021-04-01 08:09:54,969 DEBUG Recieve start signal from 10.1.41.239; assign rank 1 2021-04-01 08:09:54,969 INFO @tracker All of 2 nodes getting started Creating training matrix from encrypted file Creating training matrix from encrypted file Traceback (most recent call last): File "distr-training.py", line 35, in dtrain = xgb.DMatrix({username: HOME_DIR + "demo/data/agaricus.txt.train.enc"}) File "/usr/local/lib/python3.6/dist-packages/securexgboost-0.1-py3.6.egg/securexgboost/core.py", line 570, in init c_lengths)) File "/usr/local/lib/python3.6/dist-packages/securexgboost-0.1-py3.6.egg/securexgboost/core.py", line 203, in _check_call raise XGBoostError(py_str(_LIB.XGBGetLastError())) securexgboost.core.XGBoostError: [16172645] /home/songhongguang/myproject/secure-xgboost/enclave/../include/enclave/crypto.h:277: verification failed -- mbedtls_pk_verify returned -17280 Stack trace: [bt] (0) _ZN4dmlc15LogMessageFatalD1Ev [bt] (1) _ZL15verifySignature18mbedtls_pk_contextPhmS0_m [bt] (2) _ZN14EnclaveContext22verifyClientSignaturesEPhmPPcPS0_Pm [bt] (3) _Z18check_signed_inputRNSt3__119basic_ostringstreamIcNS_11char_traitsIcEENS_9allocatorIcEEEEPPcPPhPm [bt] (4) XGDMatrixCreateFromEncryptedFile [bt] (5) enclave_XGDMatrixCreateFromEncryptedFile [bt] (6) ecall_enclave_XGDMatrixCreateFromEncryptedFile [bt] (7) oe_handle_call_enclave_function [bt] (8) _handle_ecall

Exception in thread Thread-3: Traceback (most recent call last): File "/usr/lib/python3.6/threading.py", line 916, in _bootstrap_inner self.run() File "/usr/lib/python3.6/threading.py", line 864, in run self._target(*self._args, *self._kwargs) File "/home/songhongguang/myproject/secure-xgboost/host/dmlc-core/tracker/dmlc_tracker/local.py", line 49, in exec_cmd raise RuntimeError('Get nonzero return code=%d on %s %s' % (ret, cmd, env)) RuntimeError: Get nonzero return code=1 on ['python3', 'distr-training.py'] {'SUDO_GID': '1002', 'LESSOPEN': '| /usr/bin/lesspipe %s', 'MAIL': '/var/mail/root', 'USER': 'root', 'SHLVL': '1', 'HOME': '/root', 'OLDPWD': '/home/songhongguang/myproject/secure-xgboost/python-package', 'LC_CTYPE': 'UTF-8', 'SUDOUID': '1002', 'LOGNAME': 'root', '': '/bin/sh', 'USERNAME': 'root', 'TERM': 'xterm-256color', 'PATH': '/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin:/snap/bin', 'LANG': 'C.UTF-8', 'LS_COLORS': 'rs=0:di=01;34:ln=01;36:mh=00:pi=40;33:so=01;35:do=01;35:bd=40;33;01:cd=40;33;01:or=40;31;01:mi=00:su=37;41:sg=30;43:ca=30;41:tw=30;42:ow=34;42:st=37;44:ex=01;32:.tar=01;31:.tgz=01;31:.arc=01;31:.arj=01;31:.taz=01;31:.lha=01;31:.lz4=01;31:.lzh=01;31:.lzma=01;31:.tlz=01;31:.txz=01;31:.tzo=01;31:.t7z=01;31:.zip=01;31:.z=01;31:.Z=01;31:.dz=01;31:.gz=01;31:.lrz=01;31:.lz=01;31:.lzo=01;31:.xz=01;31:.zst=01;31:.tzst=01;31:.bz2=01;31:.bz=01;31:.tbz=01;31:.tbz2=01;31:.tz=01;31:.deb=01;31:.rpm=01;31:.jar=01;31:.war=01;31:.ear=01;31:.sar=01;31:.rar=01;31:.alz=01;31:.ace=01;31:.zoo=01;31:.cpio=01;31:.7z=01;31:.rz=01;31:.cab=01;31:.wim=01;31:.swm=01;31:.dwm=01;31:.esd=01;31:.jpg=01;35:.jpeg=01;35:.mjpg=01;35:.mjpeg=01;35:.gif=01;35:.bmp=01;35:.pbm=01;35:.pgm=01;35:.ppm=01;35:.tga=01;35:.xbm=01;35:.xpm=01;35:.tif=01;35:.tiff=01;35:.png=01;35:.svg=01;35:.svgz=01;35:.mng=01;35:.pcx=01;35:.mov=01;35:.mpg=01;35:.mpeg=01;35:.m2v=01;35:.mkv=01;35:.webm=01;35:.ogm=01;35:.mp4=01;35:.m4v=01;35:.mp4v=01;35:.vob=01;35:.qt=01;35:.nuv=01;35:.wmv=01;35:.asf=01;35:.rm=01;35:.rmvb=01;35:.flc=01;35:.avi=01;35:.fli=01;35:.flv=01;35:.gl=01;35:.dl=01;35:.xcf=01;35:.xwd=01;35:.yuv=01;35:.cgm=01;35:.emf=01;35:.ogv=01;35:.ogx=01;35:.aac=00;36:.au=00;36:.flac=00;36:.m4a=00;36:.mid=00;36:.midi=00;36:.mka=00;36:.mp3=00;36:.mpc=00;36:.ogg=00;36:.ra=00;36:.wav=00;36:.oga=00;36:.opus=00;36:.spx=00;36:*.xspf=00;36:', 'SUDO_COMMAND': '/bin/bash', 'SHELL': '/bin/bash', 'LESSCLOSE': '/usr/bin/lesspipe %s %s', 'SUDO_USER': 'songhongguang', 'PWD': '/home/songhongguang/myproject/secure-xgboost/demo/python/distributed', 'XDG_DATA_DIRS': '/usr/local/share:/usr/share:/var/lib/snapd/desktop', 'DMLC_NUM_WORKER': '2', 'DMLC_NUM_SERVER': '0', 'DMLC_TRACKER_URI': '10.1.41.239', 'DMLC_TRACKER_PORT': '9092', 'DMLC_TASK_ID': '1', 'DMLC_ROLE': 'worker', 'DMLC_JOB_CLUSTER': 'local'}

Traceback (most recent call last): File "distr-training.py", line 35, in dtrain = xgb.DMatrix({username: HOME_DIR + "demo/data/agaricus.txt.train.enc"}) File "/usr/local/lib/python3.6/dist-packages/securexgboost-0.1-py3.6.egg/securexgboost/core.py", line 570, in init c_lengths)) File "/usr/local/lib/python3.6/dist-packages/securexgboost-0.1-py3.6.egg/securexgboost/core.py", line 203, in _check_call raise XGBoostError(py_str(_LIB.XGBGetLastError())) securexgboost.core.XGBoostError: [16172645] /home/songhongguang/myproject/secure-xgboost/enclave/src/data/data.cc:842: Encountered parser error: [16172645] /home/songhongguang/myproject/secure-xgboost/enclave/rabit/src/./ssl_socket.h:28: Stack trace: [bt] (0) _ZN5rabit5utilsL9print_errEi [bt] (1) _ZN5rabit6engine13AllreduceBase16TryAllreduceTreeEPvmmPFvPKvS2_iRKN3MPI8DatatypeEE [bt] (2) _ZN5rabit6engine15AllreduceRobust11RecoverExecEPvmiiiPKc [bt] (3) _ZN5rabit6engine15AllreduceRobust9AllreduceEPvmmPFvPKvS2_iRKN3MPI8DatatypeEEPFvS2_ES2PKciSE [bt] (4) _ZN5rabit6engine10Allreduce_EPvmmPFvPKvS1_iRKN3MPI8DatatypeEENS0_3mpi8DataTypeENSA_6OpTypeEPFvS1_ES1PKciSG [bt] (5) _ZN7xgboost4data13SimpleDMatrixC1ENSt316vectorIPNS0_11FileAdapterENS2_9allocatorIS5_EEEEfi [bt] (6) _ZN7xgboost7DMatrix4LoadERNSt316vectorIKNS1_12basic_stringIcNS1_11char_traitsIcEENS1_9allocatorIcEEEENS6_IS9_EEEEbbbPPcRS9_m [bt] (7) XGDMatrixCreateFromEncryptedFile [bt] (8) enclave_XGDMatrixCreateFromEncryptedFile

Stack trace: [bt] (0) _ZN4dmlc15LogMessageFatalD1Ev [bt] (1) _ZN7xgboost7DMatrix4LoadERNSt3__16vectorIKNS1_12basic_stringIcNS1_11char_traitsIcEENS1_9allocatorIcEEEENS6_IS9_EEEEbbbPPcRS9_m [bt] (2) XGDMatrixCreateFromEncryptedFile [bt] (3) enclave_XGDMatrixCreateFromEncryptedFile [bt] (4) ecall_enclave_XGDMatrixCreateFromEncryptedFile [bt] (5) oe_handle_call_enclave_function [bt] (6) _handle_ecall [bt] (7) oe_enter

Exception in thread Thread-2: Traceback (most recent call last): File "/usr/lib/python3.6/threading.py", line 916, in _bootstrap_inner self.run() File "/usr/lib/python3.6/threading.py", line 864, in run self._target(*self._args, *self._kwargs) File "/home/songhongguang/myproject/secure-xgboost/host/dmlc-core/tracker/dmlc_tracker/local.py", line 49, in exec_cmd raise RuntimeError('Get nonzero return code=%d on %s %s' % (ret, cmd, env)) RuntimeError: Get nonzero return code=1 on ['python3', 'distr-training.py'] {'SUDO_GID': '1002', 'LESSOPEN': '| /usr/bin/lesspipe %s', 'MAIL': '/var/mail/root', 'USER': 'root', 'SHLVL': '1', 'HOME': '/root', 'OLDPWD': '/home/songhongguang/myproject/secure-xgboost/python-package', 'LC_CTYPE': 'UTF-8', 'SUDOUID': '1002', 'LOGNAME': 'root', '': '/bin/sh', 'USERNAME': 'root', 'TERM': 'xterm-256color', 'PATH': '/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin:/snap/bin', 'LANG': 'C.UTF-8', 'LS_COLORS': 'rs=0:di=01;34:ln=01;36:mh=00:pi=40;33:so=01;35:do=01;35:bd=40;33;01:cd=40;33;01:or=40;31;01:mi=00:su=37;41:sg=30;43:ca=30;41:tw=30;42:ow=34;42:st=37;44:ex=01;32:.tar=01;31:.tgz=01;31:.arc=01;31:.arj=01;31:.taz=01;31:.lha=01;31:.lz4=01;31:.lzh=01;31:.lzma=01;31:.tlz=01;31:.txz=01;31:.tzo=01;31:.t7z=01;31:.zip=01;31:.z=01;31:.Z=01;31:.dz=01;31:.gz=01;31:.lrz=01;31:.lz=01;31:.lzo=01;31:.xz=01;31:.zst=01;31:.tzst=01;31:.bz2=01;31:.bz=01;31:.tbz=01;31:.tbz2=01;31:.tz=01;31:.deb=01;31:.rpm=01;31:.jar=01;31:.war=01;31:.ear=01;31:.sar=01;31:.rar=01;31:.alz=01;31:.ace=01;31:.zoo=01;31:.cpio=01;31:.7z=01;31:.rz=01;31:.cab=01;31:.wim=01;31:.swm=01;31:.dwm=01;31:.esd=01;31:.jpg=01;35:.jpeg=01;35:.mjpg=01;35:.mjpeg=01;35:.gif=01;35:.bmp=01;35:.pbm=01;35:.pgm=01;35:.ppm=01;35:.tga=01;35:.xbm=01;35:.xpm=01;35:.tif=01;35:.tiff=01;35:.png=01;35:.svg=01;35:.svgz=01;35:.mng=01;35:.pcx=01;35:.mov=01;35:.mpg=01;35:.mpeg=01;35:.m2v=01;35:.mkv=01;35:.webm=01;35:.ogm=01;35:.mp4=01;35:.m4v=01;35:.mp4v=01;35:.vob=01;35:.qt=01;35:.nuv=01;35:.wmv=01;35:.asf=01;35:.rm=01;35:.rmvb=01;35:.flc=01;35:.avi=01;35:.fli=01;35:.flv=01;35:.gl=01;35:.dl=01;35:.xcf=01;35:.xwd=01;35:.yuv=01;35:.cgm=01;35:.emf=01;35:.ogv=01;35:.ogx=01;35:.aac=00;36:.au=00;36:.flac=00;36:.m4a=00;36:.mid=00;36:.midi=00;36:.mka=00;36:.mp3=00;36:.mpc=00;36:.ogg=00;36:.ra=00;36:.wav=00;36:.oga=00;36:.opus=00;36:.spx=00;36:*.xspf=00;36:', 'SUDO_COMMAND': '/bin/bash', 'SHELL': '/bin/bash', 'LESSCLOSE': '/usr/bin/lesspipe %s %s', 'SUDO_USER': 'songhongguang', 'PWD': '/home/songhongguang/myproject/secure-xgboost/demo/python/distributed', 'XDG_DATA_DIRS': '/usr/local/share:/usr/share:/var/lib/snapd/desktop', 'DMLC_NUM_WORKER': '2', 'DMLC_NUM_SERVER': '0', 'DMLC_TRACKER_URI': '10.1.41.239', 'DMLC_TRACKER_PORT': '9092', 'DMLC_TASK_ID': '0', 'DMLC_ROLE': 'worker', 'DMLC_JOB_CLUSTER': 'local'}

It seems that the reason is because the enclave failed to validate the client's signature. It's strange that 1 worker works but 2 workers doesn't work. Could you please tell me the possible reason? (We used the simulation mode, could that be the cause of the problem?) Thank you~~~

chester-leung commented 3 years ago

I've submitted a PR to patch this. It should be reviewed and merged shortly.