Closed prymitive closed 11 years ago
with 128 you are starting 128 threads per worker ;) New implementation is async, so you need only one thread offload-thread = 1
So should uWSGI let users set --offload > 1? Strace log from thread that got error is here: https://gist.github.com/f86fce19cc73e99ac277
The idea is allowing more threads per worker but i have not tested it heavily with more than 4 threads
With only 1 thread I can't get this segfault, I'll try valgrind or gdb
i could have found the problem uwsgi.offload_requests_head structure is shared by all of the threads while it should be "dedicated"
can you try with latest HEAD ?
Now it segfaults almost instantly and keeps segfaulting all the time:
[pid 26818] accept(6, {sa_family=AF_INET, sin_port=htons(15179), sin_addr=inet_addr("172.16.200.53")}, [16]) = 397
[pid 26818] gettimeofday({1352724785, 178156}, NULL) = 0
[pid 26818] poll([{fd=397, events=POLLIN}], 1, 4000) = 1 ([{fd=397, revents=POLLIN}])
[pid 26818] recvmsg(397, {msg_name(0)=NULL, msg_iov(1)=[{"\0\5\2\0", 4}], msg_controllen=0, msg_flags=0}, 0) = 4
[pid 26818] poll([{fd=397, events=POLLIN}], 1, 4000) = 1 ([{fd=397, revents=POLLIN}])
[pid 26818] read(397, "\f\0QUERY_STRING\0\0\16\0REQUEST_METHOD"..., 517) = 517
[pid 26818] lstat("/hom", 0x7ffff8b8ab50) = -1 ENOENT (No such file or directory)
[pid 26818] lstat("/home", {st_mode=S_IFDIR|0755, st_size=4096, ...}) = 0
[pid 26818] lstat("/home/onion", {st_mode=S_IFDIR|0755, st_size=4096, ...}) = 0
[pid 26818] lstat("/home/onion/static", {st_mode=S_IFDIR|0755, st_size=4096, ...}) = 0
[pid 26818] lstat("/home/onion/static/js", {st_mode=S_IFDIR|0755, st_size=4096, ...}) = 0
[pid 26818] lstat("/home/onion/static/js/jquery.qtip.js", {st_mode=S_IFREG|0644, st_size=97197, ...}) = 0
[pid 26818] stat("/home/onion/static/js/jquery.qtip.js", {st_mode=S_IFREG|0644, st_size=97197, ...}) = 0
[pid 26818] writev(397, [{"HTTP/1.0", 8}, {" 200 OK\r\n", 9}], 2) = 17
[pid 26818] writev(397, [{"X-AppID: 1", 10}, {"\r\n", 2}], 2) = 12
[pid 26818] writev(397, [{"Content-Type: ", 14}, {"application/javascript", 22}, {"\r\n", 2}], 3) = 38
[pid 26818] writev(397, [{"Content-Length: ", 16}, {"97197", 5}, {"\r\n", 2}, {"Last-Modified: Wed, 19 Sep 2012 "..., 48}], 4) = 71
[pid 26818] fcntl(397, F_GETFL) = 0x2 (flags O_RDWR)
[pid 26818] fcntl(397, F_SETFL, O_RDWR|O_NONBLOCK) = 0
[pid 26818] open("/home/onion/static/js/jquery.qtip.js", O_RDONLY|O_NONBLOCK) = 398
[pid 26818] write(392, "\215\1\0\0\216\1\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0"..., 96) = 96
[pid 26946] <... epoll_wait resumed> {{EPOLLIN, {u32=393, u64=393}}}, 64, 4294967295) = 1
[pid 26818] gettimeofday( <unfinished ...>
[pid 26946] read(393, <unfinished ...>
[pid 26818] <... gettimeofday resumed> {1352724785, 182344}, NULL) = 0
[pid 26946] <... read resumed> "\215\1\0\0\216\1\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0"..., 96) = 96
[pid 26818] open("/proc/self/stat", O_RDONLY <unfinished ...>
[pid 26946] epoll_ctl(395, EPOLL_CTL_ADD, 397, {EPOLLOUT, {u32=397, u64=397}} <unfinished ...>
[pid 26818] <... open resumed> ) = 399
[pid 26946] <... epoll_ctl resumed> ) = 0
[pid 26818] fstat(399, <unfinished ...>
[pid 26946] epoll_wait(395, <unfinished ...>
[pid 26818] <... fstat resumed> {st_mode=S_IFREG|0444, st_size=0, ...}) = 0
[pid 26946] <... epoll_wait resumed> {{EPOLLOUT, {u32=397, u64=397}}}, 64, 4294967295) = 1
[pid 26818] mmap(NULL, 4096, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0 <unfinished ...>
[pid 26946] sendfile(397, 398, [0], 131072 <unfinished ...>
[pid 26818] <... mmap resumed> ) = 0x7f41d3c48000
[pid 26946] <... sendfile resumed> ) = 65160
[pid 26946] close(397 <unfinished ...>
[pid 26818] read(399, <unfinished ...>
[pid 26946] <... close resumed> ) = 0
[pid 26946] close(398 <unfinished ...>
[pid 26818] <... read resumed> "2682 (uwsgi) R 1 0 0 0 -1 420256"..., 1024) = 237
[pid 26946] <... close resumed> ) = 0
[pid 26818] close(399 <unfinished ...>
[pid 26946] epoll_wait(395, <unfinished ...>
[pid 26818] <... close resumed> ) = 0
[pid 26818] munmap(0x7f41d3c48000, 4096) = 0
[pid 26818] epoll_wait(396, {{EPOLLIN, {u32=6, u64=6}}}, 1, 4294967295) = 1
[pid 26818] accept(6, {sa_family=AF_INET, sin_port=htons(15180), sin_addr=inet_addr("172.16.200.53")}, [16]) = 397
[pid 26818] gettimeofday({1352724785, 185827}, NULL) = 0
[pid 26818] poll([{fd=397, events=POLLIN}], 1, 4000) = 1 ([{fd=397, revents=POLLIN}])
[pid 26818] recvmsg(397, {msg_name(0)=NULL, msg_iov(1)=[{"\0\5\2\0", 4}], msg_controllen=0, msg_flags=0}, 0) = 4
[pid 26818] poll([{fd=397, events=POLLIN}], 1, 4000) = 1 ([{fd=397, revents=POLLIN}])
[pid 26818] read(397, "\f\0QUERY_STRING\0\0\16\0REQUEST_METHOD"..., 517) = 517
[pid 26818] lstat("/hom", 0x7ffff8b8ab50) = -1 ENOENT (No such file or directory)
[pid 26818] lstat("/home", {st_mode=S_IFDIR|0755, st_size=4096, ...}) = 0
[pid 26818] lstat("/home/onion", {st_mode=S_IFDIR|0755, st_size=4096, ...}) = 0
[pid 26818] lstat("/home/onion/static", {st_mode=S_IFDIR|0755, st_size=4096, ...}) = 0
[pid 26818] lstat("/home/onion/static/js", {st_mode=S_IFDIR|0755, st_size=4096, ...}) = 0
[pid 26818] lstat("/home/onion/static/js/jquery.qtip.js", {st_mode=S_IFREG|0644, st_size=97197, ...}) = 0
[pid 26818] stat("/home/onion/static/js/jquery.qtip.js", {st_mode=S_IFREG|0644, st_size=97197, ...}) = 0
[pid 26818] writev(397, [{"HTTP/1.0", 8}, {" 200 OK\r\n", 9}], 2) = 17
[pid 26818] writev(397, [{"X-AppID: 1", 10}, {"\r\n", 2}], 2) = 12
[pid 26818] writev(397, [{"Content-Type: ", 14}, {"application/javascript", 22}, {"\r\n", 2}], 3) = 38
[pid 26818] writev(397, [{"Content-Length: ", 16}, {"97197", 5}, {"\r\n", 2}, {"Last-Modified: Wed, 19 Sep 2012 "..., 48}], 4) = 71
[pid 26818] fcntl(397, F_GETFL) = 0x2 (flags O_RDWR)
[pid 26818] fcntl(397, F_SETFL, O_RDWR|O_NONBLOCK) = 0
[pid 26818] open("/home/onion/static/js/jquery.qtip.js", O_RDONLY|O_NONBLOCK) = 398
[pid 26818] write(7, "\215\1\0\0\216\1\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0"..., 96) = 96
[pid 26819] <... epoll_wait resumed> {{EPOLLIN, {u32=9, u64=9}}}, 64, 4294967295) = 1
[pid 26818] gettimeofday( <unfinished ...>
[pid 26819] read(9, <unfinished ...>
[pid 26818] <... gettimeofday resumed> {1352724785, 188899}, NULL) = 0
[pid 26819] <... read resumed> "\215\1\0\0\216\1\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0"..., 96) = 96
[pid 26818] open("/proc/self/stat", O_RDONLY <unfinished ...>
[pid 26819] epoll_ctl(18, EPOLL_CTL_ADD, 397, {EPOLLOUT, {u32=397, u64=397}} <unfinished ...>
[pid 26818] <... open resumed> ) = 399
[pid 26819] <... epoll_ctl resumed> ) = 0
[pid 26818] fstat(399, {st_mode=S_IFREG|0444, st_size=0, ...}) = 0
[pid 26819] --- SIGSEGV (Segmentation fault) @ 0 (0) ---
Process 26819 detached
[pid 26818] mmap(NULL, 4096, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0 <unfinished ...>
[pid 26944] +++ killed by SIGSEGV +++
[pid 26943] +++ killed by SIGSEGV +++
[pid 26942] +++ killed by SIGSEGV +++
[pid 26946] +++ killed by SIGSEGV +++
[pid 26945] +++ killed by SIGSEGV +++
[pid 26941] +++ killed by SIGSEGV +++
[pid 26940] +++ killed by SIGSEGV +++
BTW:
[pid 26818] lstat("/hom", 0x7ffff8b8ab50) = -1 ENOENT (No such file or directory)
[pid 26818] lstat("/home", {st_mode=S_IFDIR|0755, st_size=4096, ...}) = 0
Why is it trying to stat /hom? Maybe off-by-one error somewhere?
ok, fixed the segfault now i will check the /hom problem.
Can you paste the config you are using ?
[uwsgi]
if-exists = /sudoku/onion-2517/home/onion/.sudoku/uwsgi.ini
ini = /sudoku/onion-2517/home/onion/.sudoku/uwsgi.ini
endif =
autoload = true
plugins-dir = /usr/lib/uwsgi/plugins
socket = 172.16.200.55:3001
stats = :4001
add-header = X-AppID: 1
pidfile = /var/run/emperor-onion.pid
uid = www-data
gid = www-data
master = true
vacuum = true
no-orphans = true
reload-mercy = 300
idle = 600
post-buffering = 4096
memory-report = true
auto-procname = true
procname-prefix-spaced = [onion@2517]
route = ^/onion/(.+) redirect:/$1
processes = 1
max-requests = 0
reload-on-rss = 128
harakiri = 90
harakiri-verbose = true
pcre-jit = true
offload-threads = 128
namespace = /sudoku/onion-2517:sudoku-dev-backend1.domain.com
chdir = /home/onion/
cgroup = /cgroup/sudoku.onion.1
cgroup-opt = memory.limit_in_bytes=268435456
cgroup-opt = memory.memsw.limit_in_bytes=268435456
cgroup-opt = cpu.shares=1024
ksm = 20
if-exists = /sudoku/onion-2517/usr/local/bin/setenv.sh
exec-as-root = /usr/local/bin/setenv.sh
endif =
if-exists = /sudoku/onion-2517/etc/dropbear/banner
exec-as-root = chown -R www-data /etc/dropbear
namespace-keep-mount = /dev/pts
smart-attach-daemon = /tmp/dropbear.pid /usr/sbin/dropbear -b /etc/dropbear/banner -w -s -j -k -p 5001 -E -F -I 300 -P /tmp/dropbear.pid
endif =
static-map = /static=/mfs
namespace-keep-mount = /mnt/mfs/1:/mfs
env = BACKEND_NUM=1
env = SUDOKU_APP_ID=1
env = LANG=pl_PL.UTF-8
env = VARNISH_SERVERS=172.16.200.53
env = DJANGO_SETTINGS_MODULE=onion.settings
plugins = python
module = django.core.handlers.wsgi:WSGIHandler()
pythonpath = /home
static-map = /static=/home/onion/static
static-index = index.html
unsubscribe-on-graceful-reload = true
subscribe-freq = 3
subscribe-to = 172.16.200.53:2626:app1.domain.com
logger = rsyslog:127.0.0.1:514,onion
threaded-logger = true
log-x-forwarded-for = true
log-slow = 1000
log-4xx = true
log-5xx = true
disable-logging = true
carbon = 172.16.200.72:2003
carbon-id = onion
carbon-no-workers = true
With latest commit I can't get segfault anymore, seems fixed
even the wrong stat() should be fixed now, i will wait for your confirmation before backporting to 1.4
It's already compiling ;)
stat is fine now, and offload was fixed with previous commit but I'll leave ab run 10.000.000 requests, if it passes it than it's fixed
Looks fine, I can't get it to segfault anymore
With 1.4 final I've notice a lot of offload related stuff, so I wanted to give it a try, sadly I'm getting segfaults:
This happens with
offload-threads = 128
and ab run using:ab -r -c 100 -n 10000000 http://localhost:8080/static/js/jquery.qtip.js
It doesn't happen instantly, I need to wait few minutes to trigger it, I will try to narrow it down.