trampgeek / jobe

jobe is a server that runs small programming jobs in a variety of programming languages
MIT License
108 stars 78 forks source link

Matlab is crashing (segmentation violation detected) #49

Closed martin-vitek closed 3 years ago

martin-vitek commented 3 years ago

Hello, I'm trying to run questions for Matlab. The question type is python3 and template is as follows:

""" The template for a question type that runs a student-submitted
    matlab function.
"""

import subprocess, sys

# Write the test and student code to a file student_code.m
test_code = """{{ TEST.testcode }}

"""

student_answer = """{{ STUDENT_ANSWER }}"""
with open("student_code.m", "w") as src:
    print(test_code, file=src)
    print(student_answer, file=src)

# Run the code. Since this is a per-test template,
# stdin is already set up for the stdin text specified in the test case,
# so we can run the compiled program directly.
try:
    output = subprocess.check_output(["matlab", "-batch", "student_code"], universal_newlines=True)
    print(output)
except subprocess.CalledProcessError as e:
    print("Task failed with signal", -e.returncode, file=sys.stderr)
    print("** Further testing aborted **", file=sys.stderr)

But Matlab is crashing when running the question with the following error (I added strace for debugging):

***Error***
execve("/usr/local/bin/matlab", ["matlab", "-batch", "student_code"], 0x7fff2d0d3cc0 /* 5 vars */) = 0
brk(NULL)                               = 0x55d0eee27000
arch_prctl(0x3001 /* ARCH_??? */, 0x7ffefdb9f050) = -1 EINVAL (Invalid argument)
access("/etc/ld.so.preload", R_OK)      = -1 ENOENT (No such file or directory)
openat(AT_FDCWD, "/etc/ld.so.cache", O_RDONLY|O_CLOEXEC) = 3
fstat(3, {st_mode=S_IFREG|0644, st_size=36065, ...}) = 0
mmap(NULL, 36065, PROT_READ, MAP_PRIVATE, 3, 0) = 0x7f2da6e4f000
close(3)                                = 0
openat(AT_FDCWD, "/lib64/libtinfo.so.6", O_RDONLY|O_CLOEXEC) = 3
read(3, "\177ELF\2\1\1\0\0\0\0\0\0\0\0\0\3\0>\0\1\0\0\0p\364\0\0\0\0\0\0"..., 832) = 832
fstat(3, {st_mode=S_IFREG|0755, st_size=191480, ...}) = 0
mmap(NULL, 8192, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0) = 0x7f2da6e4d000
mmap(NULL, 190832, PROT_READ, MAP_PRIVATE|MAP_DENYWRITE, 3, 0) = 0x7f2da6e1e000
mmap(0x7f2da6e2c000, 57344, PROT_READ|PROT_EXEC, MAP_PRIVATE|MAP_FIXED|MAP_DENYWRITE, 3, 0xe000) = 0x7f2da6e2c000
mmap(0x7f2da6e3a000, 57344, PROT_READ, MAP_PRIVATE|MAP_FIXED|MAP_DENYWRITE, 3, 0x1c000) = 0x7f2da6e3a000
mmap(0x7f2da6e48000, 20480, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_FIXED|MAP_DENYWRITE, 3, 0x29000) = 0x7f2da6e48000
close(3)                                = 0
openat(AT_FDCWD, "/lib64/libdl.so.2", O_RDONLY|O_CLOEXEC) = 3
read(3, "\177ELF\2\1\1\0\0\0\0\0\0\0\0\0\3\0>\0\1\0\0\0p\"\0\0\0\0\0\0"..., 832) = 832
fstat(3, {st_mode=S_IFREG|0755, st_size=37240, ...}) = 0
mmap(NULL, 24688, PROT_READ, MAP_PRIVATE|MAP_DENYWRITE, 3, 0) = 0x7f2da6e17000
mmap(0x7f2da6e19000, 8192, PROT_READ|PROT_EXEC, MAP_PRIVATE|MAP_FIXED|MAP_DENYWRITE, 3, 0x2000) = 0x7f2da6e19000
mmap(0x7f2da6e1b000, 4096, PROT_READ, MAP_PRIVATE|MAP_FIXED|MAP_DENYWRITE, 3, 0x4000) = 0x7f2da6e1b000
mmap(0x7f2da6e1c000, 4096, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_FIXED|MAP_DENYWRITE, 3, 0x4000) = 0x7f2da6e1c000
mmap(0x7f2da6e1d000, 112, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_FIXED|MAP_ANONYMOUS, -1, 0) = 0x7f2da6e1d000
close(3)                                = 0
openat(AT_FDCWD, "/lib64/libc.so.6", O_RDONLY|O_CLOEXEC) = 3
read(3, "\177ELF\2\1\1\3\0\0\0\0\0\0\0\0\3\0>\0\1\0\0\0 \203\2\0\0\0\0\0"..., 832) = 832
pread64(3, "\6\0\0\0\4\0\0\0@\0\0\0\0\0\0\0@\0\0\0\0\0\0\0@\0\0\0\0\0\0\0"..., 784, 64) = 784
pread64(3, "\4\0\0\0\20\0\0\0\5\0\0\0GNU\0\2\0\0\300\4\0\0\0\3\0\0\0\0\0\0\0", 32, 848) = 32
pread64(3, "\4\0\0\0\24\0\0\0\3\0\0\0GNU\0J\177\263t\t\177\271'\373\223\323^\371\213\250\222"..., 68, 880) = 68
fstat(3, {st_mode=S_IFREG|0755, st_size=3222048, ...}) = 0
pread64(3, "\6\0\0\0\4\0\0\0@\0\0\0\0\0\0\0@\0\0\0\0\0\0\0@\0\0\0\0\0\0\0"..., 784, 64) = 784
mmap(NULL, 1876640, PROT_READ, MAP_PRIVATE|MAP_DENYWRITE, 3, 0) = 0x7f2da6c4c000
mprotect(0x7f2da6c72000, 1683456, PROT_NONE) = 0
mmap(0x7f2da6c72000, 1372160, PROT_READ|PROT_EXEC, MAP_PRIVATE|MAP_FIXED|MAP_DENYWRITE, 3, 0x26000) = 0x7f2da6c72000
mmap(0x7f2da6dc1000, 307200, PROT_READ, MAP_PRIVATE|MAP_FIXED|MAP_DENYWRITE, 3, 0x175000) = 0x7f2da6dc1000
mmap(0x7f2da6e0d000, 24576, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_FIXED|MAP_DENYWRITE, 3, 0x1c0000) = 0x7f2da6e0d000
mmap(0x7f2da6e13000, 12960, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_FIXED|MAP_ANONYMOUS, -1, 0) = 0x7f2da6e13000
close(3)                                = 0
mmap(NULL, 12288, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0) = 0x7f2da6c49000
arch_prctl(ARCH_SET_FS, 0x7f2da6c49740) = 0
mprotect(0x7f2da6e0d000, 12288, PROT_READ) = 0
mprotect(0x7f2da6e1c000, 4096, PROT_READ) = 0
mprotect(0x7f2da6e48000, 16384, PROT_READ) = 0
mprotect(0x55d0ee6b0000, 16384, PROT_READ) = 0
mprotect(0x7f2da6e83000, 4096, PROT_READ) = 0
munmap(0x7f2da6e4f000, 36065)           = 0
openat(AT_FDCWD, "/dev/tty", O_RDWR|O_NONBLOCK) = -1 ENXIO (No such device or address)
ioctl(0, TCGETS, 0x7ffefdb9ec90)        = -1 ENOTTY (Inappropriate ioctl for device)
brk(NULL)                               = 0x55d0eee27000
brk(0x5 ...snip... FDIR|0755, st_size=4096, ...}) = 0
rt_sigprocmask(SIG_BLOCK, ~[], [PIPE CHLD TSTP WINCH], 8) = 0
clone(child_stack=0x7f6e44e53d70, flags=CLONE_VM|CLONE_FS|CLONE_FILES|CLONE_SIGHAND|CLONE_THREAD|CLONE_SYSVSEM|CLONE_SETTLS|CLONE_PARENT_SETTID|CLONE_CHILD_CLEARTID, parent_tid=[33873], tls=0x7f6e44e54640, child_tidptr=0x7f6e44e54910) = 33873
rt_sigprocmask(SIG_SETMASK, [PIPE CHLD TSTP WINCH], NULL, 8) = 0
futex(0x1db47f0, FUTEX_WAKE_PRIVATE, 1) = 1
futex(0x1db4868, FUTEX_WAIT_PRIVATE, 0, NULL
--------------------------------------------------------------------------------
          Segmentation violation detected at 2021-03-26 16:14:13 +0100
--------------------------------------------------------------------------------

Configuration:
  Crash Decoding           : Disabled - No sandbox or build area path
  Crash Mode               : continue (default)
  Default Encoding         : UTF-8
  GNU C Library            : 2.32 stable
  MATLAB Architecture      : glnxa64
  MATLAB Root              : /opt/Matlab/R2020b
  MATLAB Version           : 9.9.0.1467703 (R2020b)
  Operating System         : Linux 5.10.23-200.fc33.x86_64 #1 SMP Thu Mar 11 22:18:30 UTC 2021 x86_64
  Process ID               : 33771
  Processor ID             : x86 Family 6 Model 86 Stepping 3, GenuineIntel
  Session Key              : c2fd9afe-4421-466c-872f-a63646d54ad4
  Static TLS mitigation    : Disabled: Unnecessary

Fault Count: 1

Abnormal termination:
Segmentation violation

Current Thread: 'MCR 0 interpret' id 140111152932416

Register State (from fault):
  RAX = 0000000000000000  RBX = 00007f6e2b7fd120
  RCX = 00007f6e4d0701d7  RDX = 000000007fffffff
  RSP = 00007f6e2b7fd018  RBP = 00007f6e2b7fd080
  RSI = 0000000000000081  RDI = 0000000000000000

   R8 = 0000000000000000   R9 = 0000000000000001
  R10 = 0000000000000000  R11 = 0000000000000246
  R12 = 00007f6e2b7fd140  R13 = 00007f6e2b7fd100
  R14 = 00007f6e2b7fd140  R15 = 00007f6e3e7802a1

  RIP = 00007f6e49602050  EFL = 0000000000010246

   CS = 0033   FS = 0000   GS = 0000

Stack Trace (from fault):
[  0] 0x00007f6e49602050 /opt/Matlab/R2020b/bin/glnxa64/libmwsettingscore.so+01814608 _ZNK8settings4core8Settings28isConnectedToTheSettingsTreeEv+00000000
[  1] 0x00007f6e3e6bc276         /opt/Matlab/R2020b/bin/glnxa64/libmwmcr.so+00680566
[  2] 0x00007f6e3e6fdea5         /opt/Matlab/R2020b/bin/glnxa64/libmwmcr.so+00949925
[  3] 0x00007f6e3e711b55         /opt/Matlab/R2020b/bin/glnxa64/libmwmcr.so+01030997
[  4] 0x00007f6e3e6e8f9a         /opt/Matlab/R2020b/bin/glnxa64/libmwmcr.so+00864154 _ZNSt17_Function_handlerIFSt10unique_ptrINSt13__future_base12_Result_baseENS2_8_DeleterEEvENS1_12_Task_setterIS0_INS1_7_ResultIvEES3_EZNS1_11_Task_stateISt8functionIFvvEESaIiESC_E6_M_runEvEUlvE_vEEE9_M_invokeERKSt9_Any_data+00000042
[  5] 0x00007f6e4d3215ff    /opt/Matlab/R2020b/bin/glnxa64/libmwservices.so+02717183 _ZNSt13__future_base13_State_baseV29_M_do_setEPSt8functionIFSt10unique_ptrINS_12_Result_baseENS3_8_DeleterEEvEEPb+00000031
[  6] 0x00007f6e4d0701af                             /lib64/libpthread.so.0+00070063
[  7] 0x00007f6e4b128306      /opt/Matlab/R2020b/bin/glnxa64/libmwmlutil.so+08098566 _ZSt9call_onceIMNSt13__future_base13_State_baseV2EFvPSt8functionIFSt10unique_ptrINS0_12_Result_baseENS4_8_DeleterEEvEEPbEJPS1_S9_SA_EEvRSt9once_flagOT_DpOT0_+00000102
[  8] 0x00007f6e3e6e8484         /opt/Matlab/R2020b/bin/glnxa64/libmwmcr.so+00861316
[  9] 0x00007f6e3e6e8727         /opt/Matlab/R2020b/bin/glnxa64/libmwmcr.so+00861991
[ 10] 0x00007f6e4c25f482 /opt/Matlab/R2020b/bin/glnxa64/libmwboost_thread.so.1.70.0+00062594
[ 11] 0x00007f6e4d0683f9                             /lib64/libpthread.so.0+00037881
[ 12] 0x00007f6e4c9f8b53                                   /lib64/libc.so.6+01055571 clone+00000067

** This crash report has been saved to disk as /tmp/matlab_crash_dump.33771-1 **

MATLAB is exiting because of fatal error
) = ?
+++ killed by SIGKILL +++
Task failed with signal 9
** Further testing aborted **

I'm guessing, that there is some problem with the sandboxing, because I can run the subprocess.check_output(["matlab", "-batch", "student_code"], universal_newlines=True) command interactively as normal user without any problems.

Do you have some idea how to solve this?

Thank you

martin-vitek commented 3 years ago

Maybe there is a problem with opening /dev/tty, because Matlab is opening it for read and also for write and this call fails (it doesn't fail under normal user).

martin-vitek commented 3 years ago

Ok, so /dev/tty isn't the problem. Matlab isn't crashing with -nojvm parameter, so this is solved for me until I will need something, that needs jvm.

trampgeek commented 3 years ago

Good to know it's working. When we ran MATLAB in Jobe the command we used was

/usr/local/bin/matlab_exec_cli -nodisplay -nojvm -nosplash -r exit

AFAIK the jvm is relevant only to graphical output. Anyway, it never caused us any problems.

Is it possible you were getting a memory error before? Setting the memory limit to 0 turns off memory limits which is often a good first debugging step. Another common problem is with the number of processes, which is controlled by the numprocs sandbox parameter. The JVM is very greedy with both those resources.

Would you mind posting questions of this sort to one of the forums on coderunner.org.nz in future please? That's where most such questions are answered.