nim-works / loony

A high throughput MPMC lock-free queue based on a paper by Giersch, Nolte et al implemented in pure Nim.
https://nim-works.github.io/loony/
MIT License
65 stars 4 forks source link

SIGSEGV: Illegal storage access. (Attempt to read from nil?) #31

Open nimelias opened 7 months ago

nimelias commented 7 months ago

Using a slightly modified example code for more consumer threads my system throws SIGSEGV error.

import std/[locks, os, atomics]
import loony

type
  Message = ref object
    value: string

let fifo = newLoonyQueue[Message]()
var terminate: Atomic[bool]

proc producer() {.thread.} =
  var n: int = 0
  for i in 1..1000000:
    n.inc()
    let msg = Message(value: "Message " & $i)
    if n > 10000:
      echo "Producing ", repr(msg)
      n = 0
    fifo.push msg
    #sleep(0)
  terminate.store(true)

proc consumer() {.thread.} =
  var n: int = 0
  while true:
    let item = fifo.pop
    if not item.isNil:
        n.inc()
        if n > 10000:
            echo "Consumed: ", repr(item)
            n = 0
    else:
        if terminate.load:
            break
    #sleep(0)

# Create worker threads
var producerThread: Thread[void]
var consumerthreads: array[512, Thread[void]]

const THREADS: int = 14

# Start worker threads
createThread(producerThread, producer)
for t in 0..<THREADS:
    echo "create thread", t
    consumerthreads[t].createThread(consumer)

joinThread(producerThread)

for t in 0..<THREADS:    
    joinThread(consumerthreads[t])
disruptek commented 7 months ago

Which compiler? Which compilation options? In particular, make sure you're using --gc:arc and --define:useMalloc.

nimelias commented 7 months ago

Using Nim devel. With 14 threads the whole cycle but fails with 15 or more.

Code:

import std/[locks, os, atomics]
import loony

type
  Message = ref object
    value: string

let fifo = newLoonyQueue[Message]()
var terminate: Atomic[bool]

proc producer() {.thread.} =
  var n: int = 0
  for i in 1..10000000:
    n.inc()
    let msg = Message(value: "Message " & $i)
    if n > 10000:
      echo "Producing ", repr(msg)
      n = 0
    fifo.push msg
    #sleep(0)
  terminate.store(true)

proc consumer() {.thread.} =
  var n: int = 0
  while true:
    let item = fifo.pop
    if not item.isNil:
        n.inc()
        if n > 10000:
            echo "Consumed: ", repr(item)
            n = 0
    else:
        if terminate.load:
            break
    #sleep(0)

# Create worker threads
var producerThread: Thread[void]
var consumerthreads: array[512, Thread[void]]

const THREADS: int = 15

# Start worker threads
createThread(producerThread, producer)
for t in 0..<THREADS:
    echo "create thread", t
    consumerthreads[t].createThread(consumer)

joinThread(producerThread)

for t in 0..<THREADS:    
    joinThread(consumerthreads[t])

Output:

nim c -r --mm:arc --define:useMalloc mupmuc2
CC: mupmuc2.nim
Hint:  [Link]
Hint: mm: arc; threads: on; opt: none (DEBUG BUILD, `-d:release` generates faster code)
51828 lines; 1.222s; 71.562MiB peakmem; proj: /tank/lana/_playground/nim/froga2_threading/mupmuc2; out: /tank/lana/_playground/nim/froga2_threading/mupmuc2 [SuccessX]
Hint: /tank/lana/_playground/nim/froga2_threading/mupmuc2 [Exec]
create thread0
create thread1
create thread2
create thread3
create thread4
create thread5
create thread6
create thread7
create thread8
create thread9
create thread10
create thread11
create thread12
create thread13
create thread14
Producing Message(value: "Message 10001")
Producing Message(value: "Message 20002")
Producing Message(value: "Message 30003")
Producing Message(value: "Message 40004")
Consumed: Message(value: "Message 44369")
Producing Message(value: "Message 50005")
Producing Message(value: "Message 60006")
Producing Message(value: "Message 70007")
Producing Message(value: "Message 80008")
Producing Message(value: "Message 90009")
Consumed: Message(value: "Message 90332")
Producing Message(value: "Message 100010")
Producing Message(value: "Message 110011")
Producing Message(value: "Message 120012")
Producing Message(value: "Message 130013")
Consumed: Message(value: "Message 135967")
Consumed: Message(value: "Message 137544")
Producing Message(value: "Message 140014")
Producing Message(value: "Message 150015")
Consumed: Message(value: "Message 152439")
Consumed: Message(value: "Message 157277")
Producing Message(value: "Message 160016")
Consumed: Message(value: "Message 168461")
Producing Message(value: "Message 170017")
Consumed: Message(value: "Message 173219")
Consumed: Message(value: "Message 173726")
Producing Message(value: "Message 180018")
Consumed: Message(value: "Message 180618")
Consumed: Message(value: "Message 184718")
Consumed: Message(value: "Message 185905")
Producing Message(value: "Message 190019")
Consumed: Message(value: "Message 193865")
Consumed: Message(value: "Message 193995")
Consumed: Message(value: "Message 199588")
Producing Message(value: "Message 200020")
Consumed: Message(value: "Message 202526")
Producing Message(value: "Message 210021")
Consumed: Message(value: "Message 216181")
Consumed: Message(value: "Message 218638")
Producing Message(value: "Message 220022")
Consumed: Message(value: "Message 226190")
Producing Message(value: "Message 230023")
Producing Message(value: "Message 240024")
Producing Message(value: "Message 250025")
Producing Message(value: "Message 260026")
Consumed: Message(value: "Message 267352")
Producing Message(value: "Message 270027")
Producing Message(value: "Message 280028")
Producing Message(value: "Message 290029")
Producing Message(value: "Message 300030")
Consumed: Message(value: "Message 304656")
Consumed: Message(value: "Message 309993")
Producing Message(value: "Message 310031")
Consumed: Message(value: "Message 314088")
Consumed: Message(value: "Message 319833")
Producing Message(value: "Message 320032")
Producing Message(value: "Message 330033")
Consumed: Message(value: "Message 336312")
Traceback (most recent call last)
/home/z/.choosenim/toolchains/nim-#devel/lib/system/arc.nim(27) consumer
/home/z/.nimble/pkgs2/loony-0.1.13-8d1f520c13288c30301ae979c677a50473e05b61/loony.nim(411) pop
/home/z/.nimble/pkgs2/loony-0.1.13-8d1f520c13288c30301ae979c677a50473e05b61/loony.nim(372) popImpl
/home/z/.nimble/pkgs2/loony-0.1.13-8d1f520c13288c30301ae979c677a50473e05b61/loony/node.nim(104) fetchAddSlot
/home/z/.choosenim/toolchains/nim-#devel/lib/pure/concurrency/atomics.nim(353) fetchAdd
SIGSEGV: Illegal storage access. (Attempt to read from nil?)
cat /proc/cpuinfo 
processor   : 0
vendor_id   : GenuineIntel
cpu family  : 6
model       : 45
model name  : Intel(R) Xeon(R) CPU E5-2670 0 @ 2.60GHz
stepping    : 7
microcode   : 0x71a
cpu MHz     : 2248.206
cache size  : 20480 KB
physical id : 0
siblings    : 16
core id     : 0
cpu cores   : 8
apicid      : 0
initial apicid  : 0
fpu     : yes
fpu_exception   : yes
cpuid level : 13
wp      : yes
flags       : fpu vme de pse tsc msr pae mce cx8 apic sep mtrr pge mca cmov pat pse36 clflush dts acpi mmx fxsr sse sse2 ss ht tm pbe syscall nx pdpe1gb rdtscp lm constant_tsc arch_perfmon pebs bts rep_good nopl xtopology nonstop_tsc cpuid aperfmperf pni pclmulqdq dtes64 monitor ds_cpl vmx smx est tm2 ssse3 cx16 xtpr pdcm pcid dca sse4_1 sse4_2 x2apic popcnt tsc_deadline_timer aes xsave avx lahf_lm epb pti ssbd ibrs ibpb stibp tpr_shadow flexpriority ept vpid xsaveopt dtherm ida arat pln pts vnmi md_clear flush_l1d
vmx flags   : vnmi preemption_timer invvpid ept_x_only ept_1gb flexpriority tsc_offset vtpr mtf vapic ept vpid unrestricted_guest ple
bugs        : cpu_meltdown spectre_v1 spectre_v2 spec_store_bypass l1tf mds swapgs itlb_multihit mmio_unknown
bogomips    : 5189.65
clflush size    : 64
cache_alignment : 64
address sizes   : 46 bits physical, 48 bits virtual
power management:

processor   : 1
vendor_id   : GenuineIntel
disruptek commented 7 months ago

If you want to debug this stuff, my advice is to drop the echo. That function uses locking, so it will create extra contention/serialization and confuse your efforts.

Next, install https://github.com/disruptek/balls-5.1.3 and run balls --define:danger some/test/file.nim. This will run the compiler sanitizers on your test. When they fail, it will give you the command-line with which to reproduce a sanitizing test binary.

Next, consider switching to Nimskull, which uses atomic reference counting by default. It won't fix your problem, but it will give you a chance to debug a program with fewer bugs.

Finally, use a var for the message you push() into the queue, and push it with push(move msg) because, as you can confirm with --expandArc:producer, the compiler will otherwise make a copy of the ref and send that, which can of course create a problem when the msg is later destroyed.

The specific problem you're running into isn't jumping out at me, but I might be running into the same bug myself. We're adjusting the memory ordering in loony to see if it helps. I do successfully use loony to produce some pretty insane concurrency numbers, but I've also had my share of bloodletting to get to this point.

nimelias commented 7 months ago

I have made the changes (except compiling with Nimskull) without success.

Here the last code. With 4 producers and 13 consumers goes well, but fails with 5 producers in my machine.

Thank you for your work. It is an exciting project, but its use is beyond my abilities. I am just landing on Nim from Python and still have a lot to learn. I'll stick with locking for now.

import std/[locks, os, atomics]
import loony

type
  Message = ref object
    value: string

let fifo = newLoonyQueue[Message]()
var terminate: Atomic[bool]

const PRODUCER_THREADS: int = 4
const CONSUMER_THREADS: int = 13
const MESSAGE_COUNT: int = 9000000
const PRODUCER_PAUSE: int = 10000
const CONSUMER_PAUSE: int = 10000

proc producer() {.thread.} =
  var n: int = 0
  var msg: Message
  let tid: string = $getThreadId()
  for i in 1..MESSAGE_COUNT:
    n.inc()
    msg = Message(value: "Message " & tid & " " & $i)
    if n > PRODUCER_PAUSE:
      debugecho "Producing ", repr(msg)
      n = 0
      sleep(0)
    fifo.push msg
  sleep(1000)
  terminate.store(true)

proc consumer() {.thread.} =
  var n: int = 0
  while true:
    let item = fifo.pop
    if not item.isNil:
        n.inc()
        if n > CONSUMER_PAUSE:
            debugecho "Consumed: ", repr(item)
            n = 0
            sleep(0)
        #elif terminate.load:
        #    debugecho "Consumed: ", repr(item)
    else:
        sleep(0)
        if terminate.load:
            break
    #sleep(0)

# Create worker threads
var producerThreads: array[512, Thread[void]]
var consumerthreads: array[512, Thread[void]]

# Start worker threads
for t in 0..<PRODUCER_THREADS:
    debugecho "create thread", t
    producerthreads[t].createThread(producer)
for t in 0..<CONSUMER_THREADS:
    debugecho "create thread", t
    consumerthreads[t].createThread(consumer)

# goes well
for t in 0..<PRODUCER_THREADS:    
    joinThread(producerthreads[t])
for t in 0..<CONSUMER_THREADS:    
    joinThread(consumerThreads[t])

# SIGSEGV: Illegal storage access. (Attempt to read from nil?)
#joinThreads(producerThreads)
#joinThreads(consumerThreads)
shayanhabibi commented 4 weeks ago
# cat /proc/cpuinfo
processor       : 0
vendor_id       : AuthenticAMD
cpu family      : 23
model           : 113
model name      : AMD Ryzen 5 3600 6-Core Processor
stepping        : 0
microcode       : 0x8701013
cpu MHz         : 3600.000
cache size      : 32768 KB
physical id     : 0
siblings        : 12
core id         : 0
cpu cores       : 12
apicid          : 0
initial apicid  : 0
fpu             : yes
fpu_exception   : yes
cpuid level     : 26
wp              : yes
flags           : fpu vme de pse tsc msr pae mce cx8 apic sep mtrr pge mca cmov pat pse36 clflush mmx fxsr sse sse2 ht syscall nx mmxext fxsr_opt pdpe1gb rdtscp lm constant_tsc rep_good nopl xtopology tsc_reliable nonstop_tsc cpuid extd_apicid aperfmperf rapl pni pclmuldq monitor ssse3 fma cx16 sse4_1 sse4_2 movbe popcnt aes xsave avx f16c lahf_lm cmp_legacy svm extapic cr8_legacy abm sse4a misalignsse 3dnowprefetch osvw ibs skinit wdt tce topoext perfctr_core perfctr_nb bpext perfctr_llc mwaitx cpb cat_l3 cdp_l3 hw_pstate ssbd mba ibpb stibp fsgsbase bmi1 avx2 smep bmi2 cqm rdt_a adx smap clflushopt clwb sha_ni xsaveopt xsavec xgetbv1 xsaves cqm_llc cqm_occup_llc cqm_mbm_total cqm_mbm_local clzero irperf xsaveerptr rdpru wbnoinvd arat npt lbrv svm_lock nrip_save tsc_scale vmcb_clean flushbyasid decode_assists pausefilter pfthreshold avic v_vmsave_vmload vgif v_spec_ctrl overflow_recov succor smca sme sev sev_es
bogomips        : 7200.00
TLB size        : 3072 4K pages
clflush size    : 64
cache_alignment : 64
address sizes   : 48 bits physical, 48 bits virtual
power management: ts ttp tm hwpstate cpb eff_freq_ro

Running your previous example with modified constants (on nimskull with arc and danger)

const PRODUCER_THREADS: int = 100
const CONSUMER_THREADS: int = 100
const MESSAGE_COUNT: int = 9000000
const PRODUCER_PAUSE: int = 100
const CONSUMER_PAUSE: int = 100

Nil issues

Difficult to determine what the issue is for yourself. If you decide to test it again, please let me know if you continue to have the issue.

nimelias commented 2 weeks ago

"(except compiling with Nimskull)" I shouldn't have reported that Loony doesn't work properly under any compiler other than Nimskull. Sorry for the false alarm.