go-skynet / go-llama.cpp

LLama.cpp golang bindings
MIT License
686 stars 83 forks source link

cannot use full ngl offload. not sure why. error shown #259

Open hiqsociety opened 1 year ago

hiqsociety commented 1 year ago

i think it's this part, how to disable etc? GGML_ASSERT: /usr/local/src/go-llama.cpp/llama.cpp/ggml-cuda.cu:6642: src0->type == GGML_TYPE_F16

.................................................................................................
llama_new_context_with_model: kv self size  =  128.00 MB
llama_new_context_with_model: compute buffer total size =   73.47 MB
llama_new_context_with_model: VRAM scratch buffer: 72.00 MB
Model loaded successfully.
GGML_ASSERT: /usr/local/src/go-llama.cpp/llama.cpp/ggml-cuda.cu:6642: src0->type == GGML_TYPE_F16
SIGABRT: abort
PC=0x7fab5ba969fc m=0 sigcode=18446744073709551610
signal arrived during cgo execution

goroutine 1 [syscall]:
runtime.cgocall(0x49c570, 0xc000063ac0)
        /usr/local/go/src/runtime/cgocall.go:157 +0x4b fp=0xc000063a98 sp=0xc000063a60 pc=0x414c6b
github.com/go-skynet/go-llama%2ecpp._Cfunc_llama_predict(0x2f56dd0, 0x1a2bfb0, 0xc000112000, 0x0)
        _cgo_gotypes.go:236 +0x4b fp=0xc000063ac0 sp=0xc000063a98 pc=0x499e6b
github.com/go-skynet/go-llama%2ecpp.(*LLama).Predict.func2(0x589198?, 0xc000063cc8?, {0xc000112000, 0x0?, 0xc000063be8?}, 0x0?)
        /usr/local/src/go-llama.cpp/llama.go:312 +0x98 fp=0xc000063b10 sp=0xc000063ac0 pc=0x49af78
github.com/go-skynet/go-llama%2ecpp.(*LLama).Predict(0xc000012018, {0x589198, 0x449}, {0xc000063f08, 0x5, 0x0?})
        /usr/local/src/go-llama.cpp/llama.go:312 +0x28f fp=0xc000063df8 sp=0xc000063b10 pc=0x49ac2f
main.main()
        /usr/local/src/go-llama.cpp/examples/main.go:56 +0x586 fp=0xc000063f40 sp=0xc000063df8 pc=0x49c0a6
runtime.main()
        /usr/local/go/src/runtime/proc.go:267 +0x2bb fp=0xc000063fe0 sp=0xc000063f40 pc=0x4456db
runtime.goexit()
        /usr/local/go/src/runtime/asm_amd64.s:1650 +0x1 fp=0xc000063fe8 sp=0xc000063fe0 pc=0x46f821

goroutine 2 [force gc (idle)]:
runtime.gopark(0x0?, 0x0?, 0x0?, 0x0?, 0x0?)
        /usr/local/go/src/runtime/proc.go:398 +0xce fp=0xc00004efa8 sp=0xc00004ef88 pc=0x445b2e
runtime.goparkunlock(...)
        /usr/local/go/src/runtime/proc.go:404
runtime.forcegchelper()
        /usr/local/go/src/runtime/proc.go:322 +0xb3 fp=0xc00004efe0 sp=0xc00004efa8 pc=0x4459b3
runtime.goexit()
        /usr/local/go/src/runtime/asm_amd64.s:1650 +0x1 fp=0xc00004efe8 sp=0xc00004efe0 pc=0x46f821
created by runtime.init.6 in goroutine 1
        /usr/local/go/src/runtime/proc.go:310 +0x1a

goroutine 3 [GC sweep wait]:
runtime.gopark(0x0?, 0x0?, 0x0?, 0x0?, 0x0?)
        /usr/local/go/src/runtime/proc.go:398 +0xce fp=0xc00004f778 sp=0xc00004f758 pc=0x445b2e
runtime.goparkunlock(...)
        /usr/local/go/src/runtime/proc.go:404
runtime.bgsweep(0x0?)
        /usr/local/go/src/runtime/mgcsweep.go:280 +0x94 fp=0xc00004f7c8 sp=0xc00004f778 pc=0x432454
runtime.gcenable.func1()
        /usr/local/go/src/runtime/mgc.go:200 +0x25 fp=0xc00004f7e0 sp=0xc00004f7c8 pc=0x4277e5
runtime.goexit()
        /usr/local/go/src/runtime/asm_amd64.s:1650 +0x1 fp=0xc00004f7e8 sp=0xc00004f7e0 pc=0x46f821
created by runtime.gcenable in goroutine 1
        /usr/local/go/src/runtime/mgc.go:200 +0x66

goroutine 4 [GC scavenge wait]:
runtime.gopark(0xc000078000?, 0x5a1490?, 0x1?, 0x0?, 0xc0000071e0?)
        /usr/local/go/src/runtime/proc.go:398 +0xce fp=0xc00004ff70 sp=0xc00004ff50 pc=0x445b2e
runtime.goparkunlock(...)
        /usr/local/go/src/runtime/proc.go:404
runtime.(*scavengerState).park(0xa30b80)
        /usr/local/go/src/runtime/mgcscavenge.go:425 +0x49 fp=0xc00004ffa0 sp=0xc00004ff70 pc=0x42fce9
runtime.bgscavenge(0x0?)
        /usr/local/go/src/runtime/mgcscavenge.go:653 +0x3c fp=0xc00004ffc8 sp=0xc00004ffa0 pc=0x43027c
runtime.gcenable.func2()
        /usr/local/go/src/runtime/mgc.go:201 +0x25 fp=0xc00004ffe0 sp=0xc00004ffc8 pc=0x427785
runtime.goexit()
        /usr/local/go/src/runtime/asm_amd64.s:1650 +0x1 fp=0xc00004ffe8 sp=0xc00004ffe0 pc=0x46f821
created by runtime.gcenable in goroutine 1
        /usr/local/go/src/runtime/mgc.go:201 +0xa5

goroutine 5 [finalizer wait]:
runtime.gopark(0x0?, 0x0?, 0x0?, 0x0?, 0x0?)
        /usr/local/go/src/runtime/proc.go:398 +0xce fp=0xc000050628 sp=0xc000050608 pc=0x445b2e
runtime.runfinq()
        /usr/local/go/src/runtime/mfinal.go:193 +0x107 fp=0xc0000507e0 sp=0xc000050628 pc=0x426867
runtime.goexit()
        /usr/local/go/src/runtime/asm_amd64.s:1650 +0x1 fp=0xc0000507e8 sp=0xc0000507e0 pc=0x46f821
created by runtime.createfing in goroutine 1
        /usr/local/go/src/runtime/mfinal.go:163 +0x3d

rax    0x0
rbx    0x7fab62cdb000
rcx    0x7fab5ba969fc
rdx    0x6
rdi    0x10bae
rsi    0x10bae
rbp    0x10bae
rsp    0x7ffc8a755ae0
r8     0x7ffc8a755bb0
r9     0x7fffffff
r10    0x8
r11    0x246
r12    0x6
r13    0x16
r14    0x7faab3e41b30
r15    0x7ffc8a755e30
rip    0x7fab5ba969fc
rflags 0x246
cs     0x33
fs     0x0
gs     0x0
mrh-chain commented 10 months ago

I am running into this exact issue as well. Same cgocall.co line (157) and same sigcode. Works if I set the layers that is offloaded a little less than all the available layers.