radical-collaboration / extasy-grlsd

Repository to hold the input data and scripts for the ExTASY gromacs-lsdmap work
1 stars 1 forks source link

quota exceeded 2 #104

Closed euhruska closed 5 years ago

euhruska commented 5 years ago

With appman = AppManager(hostname='two.radical-project.org', port=33134) I got quota exceeded, did that host run out of memory?

12501, u'errmsg': u'quota exceeded', u'op': SON([('q', {'type': 'unit', 'uid': 'unit.000097'}), ('u', {'$set': {'control': 'umgr_pending', 'resource_sandbox': 'gsisftp://bw.ncsa.illinois.edu/scratch/sciteam/hruska/radical.pilot.sandbox', 'uid': 'unit.000097', 'name': 'task.0097,None,stage.0002,None,pipeline.0000,None', 'stdout': '', 'stderr': 'run_openmm.py:243: RuntimeWarning: overflow encountered in exp\n  q = np.mean(1.0 / (1 + np.exp(BETA_CONST * (r - LAMBDA_CONST * r0))), axis=1)\n', 'description': {'kernel': '', 'stdout': '', 'post_exec': [], 'gpu_process_type': '', 'executable': 'bwpy-environ', 'cpu_thread_type': '', 'pre_exec': ['module unload PrgEnv-cray', 'module load PrgEnv-gnu', 'module unload gcc', 'module load gcc/5.3.0', 'module unload bwpy', 'module load bwpy/2.0.0-pre0', 'module load bwpy-mpi', 'module add cudatoolkit', 'export MODULEPATH="/sw/bw/bwpy/modulefiles/:${MODULEPATH}"', 'export CPATH="${BWPY_INCLUDE_PATH}"', 'export LDFLAGS="${LDFLAGS} -Wl,--rpath=${BWPY_LIBRARY_PATH}"', 'module load craype-ml-plugin-py3/1.1.0', 'export MPICH_GNI_MALLOC_FALLBACK=enable', 'export MPICH_GNI_MAX_VSHORT_MSG_SIZE=64', 'export MPICH_MAX_THREAD_SAFETY=multiple', 'export MPICH_RMA_OVER_DMAPP=1', 'export OPENMM_PLUGIN_DIR=/mnt/bwpy/single/usr/lib/plugins', 'source /projects/sciteam/bamm/hruska/vpy8/bin/activate', 'printenv > env.log', 'export OMP_NUM_THREADS=1', 'export PYEMMA_NJOBS=1', 'export tasks=md', 'export iter=30'], 'environment': {}, 'gpu_threads': 1, 'arguments': ['python', 'run_openmm.py', '--trajstride', '10000', '--Kconfig', 'settings_extasy_tica3_villin_long.wcfg', '--idxstart', '42', '--idxend', '43', '--path', '/u/sciteam/hruska/scratch/extasy_tica3_villin_long', '--iter', '30', '--md_steps', '10000000', '--save_traj', 'True', '>', 'md.log'], 'gpu_processes': 1, 'cpu_processes': 0, 'restartable': False, 'output_staging': [{'uid': 'sd.0495', 'priority': 0, 'source': 'md.log', 'flags': 64, 'action': 'Copy', 'target': '/u/sciteam/hruska/scratch/extasy_tica3_villin_long/md_logs/iter30_md42.log'}], 'input_staging': [{'uid': 'sd.0491', 'priority': 0, 'source': 'pilot:///run-openmm-xml3.py', 'flags': 64, 'action': 'Link', 'target': 'run_openmm.py'}, {'uid': 'sd.0492', 'priority': 0, 'source': 'pilot:///system-5.xml', 'flags': 64, 'action': 'Link', 'target': 'system-5.xml'}, {'uid': 'sd.0493', 'priority': 0, 'source': 'pilot:///integrator-5.xml', 'flags': 64, 'action': 'Link', 'target': 'integrator-5.xml'}, {'uid': 'sd.0494', 'priority': 0, 'source': 'pilot:///settings_extasy_tica3_villin_long.wcfg', 'flags': 64, 'action': 'Link', 'target': 'settings_extasy_tica3_villin_long.wcfg'}], 'cpu_threads': 0, 'cpu_process_type': '', 'pilot': '', 'name': 'task.0097,None,stage.0002,None,pipeline.0000,None', 'gpu_thread_type': '', 'stderr': '', 'cleanup': False}, 'cmd': [], 'exit_code': 0, 'stdout_file': '/mnt/c/scratch/sciteam/hruska/radical.pilot.sandbox/re.session.leonardo.rice.edu.eh22.017831.0001/pilot.0000/unit.000097/STDOUT', 'target_state': 'DONE', 'state': 'UMGR_STAGING_OUTPUT_PENDING', 'stderr_file': '/mnt/c/scratch/sciteam/hruska/radical.pilot.sandbox/re.session.leonardo.rice.edu.eh22.017831.0001/pilot.0000/unit.000097/STDERR', 'pilot': 'pilot.0000', 'slots': {'cores_per_node': 16, 'nodes': [['4734', '4734', [], [[0]]]], 'gpus_per_node': 1, 'lm_info': {}}, 'client_sandbox': '/scratch1/eh22/bluewaters/extasy-koopman', 'type': 'unit', 'pilot_sandbox': 'gsisftp://bw.ncsa.illinois.edu/scratch/sciteam/hruska/radical.pilot.sandbox/re.session.leonardo.rice.edu.eh22.017831.0001/pilot.0000/', 'unit_sandbox': 'gsisftp://bw.ncsa.illinois.edu/scratch/sciteam/hruska/radical.pilot.sandbox/re.session.leonardo.rice.edu.eh22.017831.0001/pilot.0000//unit.000097/', 'umgr': 'umgr.0000'}, '$push': {'states': 'UMGR_STAGING_OUTPUT_PENDING'}}), ('multi', True), ('upsert', False)])}], 'upserted': [], 'writeConcernErrors': [], 'nRemoved': 0, 'nInserted': 0}
Traceback (most recent call last):
  File "/scratch/sciteam/hruska/radical.pilot.sandbox/re.session.leonardo.rice.edu.eh22.017831.0001/pilot.0000/rp_install/lib/python2.7/site-packages/radical/pilot/worker/update.py", line 111, in _timed_bulk_execute
    res = self._bulk.execute()
  File "/mnt/c/scratch/sciteam/hruska/radical.pilot.sandbox/ve.ncsa.bw_aprun.0.50.12/lib/python2.7/site-packages/pymongo/bulk.py", line 587, in execute
    return self.__bulk.execute(write_concern)
  File "/mnt/c/scratch/sciteam/hruska/radical.pilot.sandbox/ve.ncsa.bw_aprun.0.50.12/lib/python2.7/site-packages/pymongo/bulk.py", line 433, in execute
    return self.execute_command(generator, write_concern)
  File "/mnt/c/scratch/sciteam/hruska/radical.pilot.sandbox/ve.ncsa.bw_aprun.0.50.12/lib/python2.7/site-packages/pymongo/bulk.py", line 300, in execute_command
    raise BulkWriteError(full_result)
BulkWriteError: batch op errors occurred
andre-merzky commented 5 years ago

This is a mongodb quota error again. That machine though is neither running out of space AFAICS, nor are quota enabled for MongoDB - this is likely a different DB host then?