Closed xin053 closed 3 years ago
是不是分配的内存不足?socket_mem = 2048,2048改成socket_mem = 10240,2048试试呢 可以生成coredump文件看下呢
设置成 socket_mem = 10240,2048
会报错:
[APP] Initializing CPU core map ...
[APP] CPU core mask = 0x00000000000000000000000000001eff
[APP] Initializing EAL ...
EAL: Detected 32 lcore(s)
EAL: No free hugepages reported in hugepages-1048576kB
EAL: Probing VFIO support...
EAL: Not enough memory available on socket 0! Requested: 10240MB, available: 2048MB
PANIC in rte_eal_init():
Cannot init memory
所以使用的 socket_mem = 2048,2048
, coredump 文件
[Thread debugging using libthread_db enabled]
Using host libthread_db library "/lib64/libthread_db.so.1".
Core was generated by `./qnsm-inspect -f qnsm_inspect.cfg -c . -p 1'.
Program terminated with signal 11, Segmentation fault.
#0 0x0000000000557e05 in qnsm_crm_msg_recv ()
Missing separate debuginfos, use: debuginfo-install cyrus-sasl-lib-2.1.26-23.el7.x86_64 file-libs-5.11-37.el7.x86_64 glibc-2.17-323.el7_9.x86_64 jansson-2.10-1.el7.x86_64 keyutils-libs-1.5.8-3.el7.x86_64 krb5-libs-1.15.1-34.el7.x86_64 libcap-ng-0.7.5-4.el7.x86_64 libcom_err-1.42.9-13.el7.x86_64 libpcap-1.5.3-12.el7.x86_64 librdkafka-0.11.5-1.el7.x86_64 libselinux-2.5-14.1.el7.x86_64 libxml2-2.9.1-6.el7.5.x86_64 libyaml-0.1.4-11.el7_0.x86_64 lz4-1.7.5-2.el7.x86_64 nspr-4.25.0-2.el7_9.x86_64 nss-3.53.1-3.el7_9.x86_64 nss-softokn-freebl-3.53.1-6.el7_9.x86_64 nss-util-3.53.1-1.el7_9.x86_64 openssl-libs-1.0.2k-16.el7.x86_64 pcre-8.32-17.el7.x86_64 xz-libs-5.2.2-1.el7.x86_64 zlib-1.2.7-19.el7_9.x86_64
(gdb) bt
#0 0x0000000000557e05 in qnsm_crm_msg_recv ()
#1 0x000000000056dca9 in qnsm_crm_msg_req_handle ()
#2 0x00000000004e9d07 in eal_alarm_callback ()
#3 0x00000000004e869d in eal_intr_thread_main ()
#4 0x00007f76f0687ea5 in start_thread () from /lib64/libpthread.so.0
#5 0x00007f76efb149fd in clone () from /lib64/libc.so.6
设置成 socket_mem = 10240,2048同时需要将socket 0对应的hugepage num调整到相应的大小,再试下呢
执行了
echo 20480 > /sys/devices/system/node/node0/hugepages/hugepages-2048kB/nr_hugepages
echo 1024 > /sys/devices/system/node/node1/hugepages/hugepages-2048kB/nr_hugepages
改成 socket_mem = 40960,2048
,以及
echo 5120 > /sys/devices/system/node/node0/hugepages/hugepages-2048kB/nr_hugepages
echo 1024 > /sys/devices/system/node/node1/hugepages/hugepages-2048kB/nr_hugepages
改成 socket_mem = 10240,2048
都试了下,还是报错 Segmentation fault
测试物理机,cpu 32核,内存 128G
SIP_IN_AGG和EDGE都使用了s0c6,目前不支持多个组件实例绑定同一个core,试试呢
使用了不同的核,还是相同报错
[EAL]
log_level = 8
n = 4
socket_mem = 40960,2048
master_lcore = 0
[IDPS]
conf_file = ./suricata.yaml
;mbuf mempool cfg
;add mbuf priavte size para
[MEMPOOL0]
buffer_size = 2304
pool_size = 131072
cache_size = 256
cpu = 0;socket_id
private_size = 64 ;sizeof(QNSM_PACKET_INFO)
;for dump
[MEMPOOL1]
buffer_size = 2304
pool_size = 131072
cache_size = 256
cpu = 1;socket_id
private_size = 64 ;sizeof(QNSM_PACKET_INFO)
;link cfg
[LINK0]
rss_qs = 0 1 2 3
rss_proto_ipv4 = TCP UDP
rss_proto_ipv6 = TCP TCP_EX UDP UDP_EX
symmetrical_rss = yes
;ip_local_q = 7 reserved for future proto stack app
;arp_q = 8
;rx queue cfg
;http://dpdk.org/doc/guides/nics/ixgbe.html
[RXQ0.0]
size = 2048
burst = 32
[RXQ0.1]
size = 2048
burst = 32
[RXQ0.2]
size = 2048
burst = 32
[RXQ0.3]
size = 2048
burst = 32
;[SWQ1]
;cpu = 0
;mempool = MEMPOOL0
;dump = yes
;[SWQ2]
;cpu = 0
;mempool = MEMPOOL0
;dump = yes
;[SWQ3]
;cpu = 0
;mempool = MEMPOOL0
;dump = yes
;[SWQ4]
;cpu = 0
;mempool = MEMPOOL0
;dump = yes
[SWQ5]
size = 2048
cpu = 1
mempool = MEMPOOL1
dup = yes
[SWQ6]
size = 2048
cpu = 1
mempool = MEMPOOL1
dup = yes
[SWQ7]
size = 2048
cpu = 1
mempool = MEMPOOL1
dup = yes
[SWQ8]
size = 2048
cpu = 1
mempool = MEMPOOL1
dup = yes
;app cfg
[PIPELINE0]
type = MASTER
core = s0c0;lcore24
[PIPELINE1]
type = SESSM
core = s0c1
pktq_in = RXQ0.0
pktq_out = SWQ5 SWQ9
timer_period = 10
[PIPELINE2]
type = SESSM
core = s0c2
pktq_in = RXQ0.1
pktq_out = SWQ6 SWQ10
timer_period = 10
[PIPELINE3]
type = SESSM
core = s0c3
pktq_in = RXQ0.2
pktq_out = SWQ7 SWQ11
timer_period = 10
[PIPELINE4]
type = SESSM
core = s0c4
pktq_in = RXQ0.3
pktq_out = SWQ8 SWQ12
timer_period = 10
;[PIPELINE5]
;type = DUMP
;core = s0c5
;pktq_in = SWQ1 SWQ2 SWQ3 SWQ4
;timer_period = 10
[PIPELINE6]
type = SIP_IN_AGG
core = s0c5
[PIPELINE7]
type = VIP_AGG
core = s0c6
pktq_in = SWQ9 SWQ10 SWQ11 SWQ12
timer_period = 10
[PIPELINE8]
type = EDGE
core = s0c7
;IPS BEGIN
[PIPELINE9]
type = DETECT
core = s1c1
pktq_in = SWQ5
[PIPELINE10]
type = DETECT
core = s1c2
pktq_in = SWQ6
[PIPELINE11]
type = DETECT
core = s1c3
pktq_in = SWQ7
[PIPELINE12]
type = DETECT
core = s1c4
pktq_in = SWQ8
;IPS END
换了台服务器重新部署了下,可以正常启动了.应该是之前部署哪个地方操作有问题
运行命令:
./qnsm-inspect -f qnsm_inspect1.cfg -c . -p 1
环境: centos7.6, dpdk 16.11.2 编译时候 kni 报错,所以使用了 dpdk 16.11.11 版本,编译正常 两个网卡:管理网卡(id 2), 使用了dpdk驱动的网卡(id 6)
配置文件:
报错
能帮忙看看嘛,感激不尽