draios / sysdig

Linux system exploration and troubleshooting tool with first class support for containers
http://www.sysdig.com/
Other
7.72k stars 726 forks source link

Issue with Sysdig when running in Kubernetes where Cilium is also installed. #1766

Open saleem-accuknox opened 3 years ago

saleem-accuknox commented 3 years ago

When I try to run Sysdig chisel in Kubernetes (GKE) with Cilium also installed, I get the Segmentation Fault:

image (9) image (8)

But, when I run it without Cilium, I do not have any issues running it. Command used to run Sydig Chisel: /usr/bin/sysdig -k http://127.0.0.1:8080 -c chisel config.yaml

The chisel

-- Chisel description
description = ""
short_description = "Monitor syscalls"
category = ""

-- Chisel argument list: Invoke as sudo sysdig -c chisel/path/to/config.yaml
args =
{
    {
        name = "config_file",
        description = "The path to the configuration yaml file",
        argtype = "string"
    }
}

-- Imports
--local inspect = require('inspect')
--local json = require ("dkjson")
local pb = require "pb"
local protoc = require "protoc"
local yaml = require('yaml')
local zmq = require "lzmq"

-- List of all field names
local all_field_names = {}
-- Map of field names to handle
local field_handles = {}
-- Map of field names to split fields (e.g. field_name = "evt.name" -> { "evt", "name" })
local split_fields = {}
-- List of common field names
--local common_field_names = {}

-- ZMQ context and socket
local context
local socket

-- Argument notification callback
function on_set_arg(name, val)
    if name == "config_file" then
        config_file = val
        return true
    end
    return false
end

-- Capture start
function on_capture_start()
    return true
end

-- Initialization callback
function on_init()
    -- read configuration file
    local fd = assert(io.open(config_file, "r"))
    local content = fd:read("*all")
    fd:close()

    -- parse configuration
    local config = yaml.eval(content)
    --print(inspect(config))

    -- make sure we have required fields in the config
    assert(config.containersec.addr)
    assert(config.containersec.schema)
    assert(config.containersec.filter_suffix)
    assert(config.containersec.field_sets)
    assert(config.containersec.common_fields)
    assert(config.containersec.syscalls)
    assert(config.containersec.deployment_type)

    -- read .proto schema file (assuming you are running from project root)
    print("Loading protobuf schema: " .. config.containersec.schema)
    fd = assert(io.open(config.containersec.schema, "r"))
    content = fd:read("*all")
    fd:close()

    -- load the schema
    assert(protoc:load(content))

    -- Connect to message queue
    context = zmq.context()
    socket, err = context:socket(zmq.PUB)
    print("Binding to " .. config.containersec.addr)
    socket:bind(config.containersec.addr)

    -- Get all field names
    local index = 1
    for field_set, field_list in pairs(config.containersec.field_sets) do
        for i, field_name in ipairs(field_list) do
            all_field_names[index] = field_name
            index = index + 1
            field_handles[field_name] = chisel.request_field(field_name)
            -- Split the field name on the first period
            -- k8s.pod.name -> k8s, pod.name
            local s, e = string.find(field_name, '.', 1, true)
            -- k8s
            local category = string.sub(field_name, 1, s-1)
            -- pod.name
            local sub_field = string.sub(field_name, e+1, string.len(field_name))
            -- Replace remaining periods with underscore in fieldname
            -- pod_name
            sub_field = sub_field:gsub("%.", "_")
            -- [k8s.pod.name] = { k8s, pod_name }
            split_fields[field_name] = {category, sub_field}
        end
    end

    -- Set the filter
    local filter = "evt.type in (" .. table.concat(config.containersec.syscalls, ",") .. ") and container.id != host" .. config.containersec.filter_suffix
    --print("Filter: " .. filter)
    chisel.set_filter(filter)

    return true
end

function handle_event(evt)
    -- Create the event
    local event = {}

    -- Get values for fields
    for i, field_name in ipairs(all_field_names) do
        -- k8s.pod.name
        local field_handle = field_handles[field_name]
        local value = evt.field(field_handle)
        if value ~= nil then
            -- Get the split field names (e.g { "evt", "name" }
            -- { k8s, pod_name }
            local tokens = split_fields[field_name]
            -- Get the first and second parts
            -- k8s
            local first = tokens[1]
            -- pod_name
            local second = tokens[2]
            -- Make sure the first table exists
            if event[first] == nil then
                event[first] = {}
            end
            -- Strip brackets from field name (e.g. arg[0] -> arg0)
            second = string.gsub(second, "%[", "")
            second = string.gsub(second, "%]", "")
            -- Add the field value to event
            event[first][second] = value
        end
    end

    -- send event to message queue
    local data = assert(pb.encode("proto.Message", event))
    if socket ~= nil and not socket:closed() then
        socket:send("sysdig", zmq.SNDMORE)
        socket:send(data)
    end
end

-- Event parsing callback
function on_event()
    local status, err = pcall(handle_event, evt);

    if not status then
        print("handle_event failed: " .. err)
    end

    return true
end

-- End of capture callback
function on_capture_end()
    -- Close socket and terminate context
    socket:close()
    context:term()

    return true
end

The config file:

containersec:
  deployment_type: kubernetes # kubernetes or container
  # ZMQ bind address
  addr: tcp://*:3654
  # Path to protobuf message schema
  schema: /message.proto
  # Filter suffix
  filter_suffix: " and evt.dir = <"
  # Named field classes
  field_sets:
    kubernetes_fields:
      - k8s.pod.name
      - k8s.ns.name
      - k8s.pod.labels
    container_fields:
      - container.id
      - container.name
    evt_fields:
      - evt.args
      - evt.arg[0]
      - evt.arg[1]
      - evt.arg[2]
      - evt.arg[3]
      - evt.category
      - evt.count
      - evt.cpu
      - evt.datetime
      - evt.deltatime
      - evt.deltatime.ns
      - evt.deltatime.s
      - evt.dir
      - evt.failed
      - evt.info
      - evt.is_io
      - evt.is_io_read
      - evt.is_io_write
      - evt.is_open_read
      - evt.is_open_write
      - evt.is_syslog
      - evt.is_wait
      - evt.latency
      - evt.latency.human
      - evt.latency.ns
      - evt.latency.s
      - evt.num
      - evt.outputtime
      - evt.rawarg.res
      - evt.rawtime
      - evt.rawtime.ns
      - evt.rawtime.s
      - evt.reltime
      - evt.reltime.ns
      - evt.reltime.s
      - evt.res
      - evt.rawres
      - evt.time
      - evt.time.iso8601
      - evt.time.s
      - evt.type
    proc_fields:
      - proc.aname
      - proc.apid
      - proc.args
      - proc.cmdline
      - proc.cwd
      - proc.duration
      - proc.env
      - proc.exe
      - proc.exeline
      - proc.exepath
      - proc.fdlimit
      - proc.fdopencount
      - proc.fdusage
      - proc.is_container_healthcheck
      - proc.is_container_liveness_probe
      - proc.is_container_readiness_probe
      - proc.loginshellid
      - proc.name
      - proc.nchilds
      - proc.nthreads
      - proc.pcmdline
      - proc.pid
      - proc.pname
      - proc.ppid
      - proc.sid
      - proc.sname
      - proc.tty
      - proc.vmrss
      - proc.vmsize
      - proc.vmswap
      - proc.vpgid
      - proc.vpid
    thread_fields:
      - thread.cgroups
      - thread.exectime
      - thread.ismain
      - thread.pfmajor
      - thread.pfminor
      - thread.tid
      - thread.totexectime
      - thread.vmrss
      - thread.vmsize
      - thread.vtid
    syscall_fields:
      - syscall.type
    fd_fields:
      - fd.cip
      - fd.connected
      - fd.containerdirectory
      - fd.containername
      - fd.cport
      - fd.cproto
      - fd.dev
      - fd.dev.major
      - fd.dev.minor
      - fd.directory
      - fd.filename
      - fd.is_server
      - fd.l4proto
      - fd.lip
      - fd.lport
      - fd.lproto
      - fd.name
      - fd.name_changed
      - fd.num
      - fd.rip
      - fd.rport
      - fd.rproto
      - fd.sip
      - fd.sockfamily
      - fd.sport
      - fd.sproto
      - fd.type
      - fd.typechar
      - fd.uid
    user_fields:
      - user.uid
      - user.name
      - user.loginuid
    group_fields:
      - group.gid
      - group.name
  # List of field classes common to all system calls
  common_fields:
    - event_fields
    - proc_fields
    - syscall_fields
    - thread_fields
    - user_fields
    - group_fields
  # List of all system calls to use
  syscalls:
    - _sysctl
    - accept
    - accept4
    - arch_prctl
    - bind
    - bpf
    - capset
    - chmod
    - chown
    - chroot
    - clock_settime
    - clone
    - clone3
    - close
    - connect
    - creat
    - create_module
    - delete_module
    - dup
    - dup2
    - dup3
    - execve
    - execve
    - execveat
    - execveat
    - exit
    - fchmod
    - fchmodat
    - fchown
    - fchownat
    - fork
    - fsconfig
    - fsmount
    - fsopen
    - futimesat
    - init_module
    - ioctl
    - kexec_file_load
    - kexec_load
    - kexec_load
    - kill
    - lchown
    - link
    - linkat
    - listen
    - mkdir
    - mkdirat
    - mknod
    - mknodat
    - mount
    - move_mount
    - nfsservctl
    - open
    - openat
    - pivot_root
    - prctl
    - process_vm_readv
    - process_vm_readv
    - process_vm_writev
    - process_vm_writev
    - ptrace
    - reboot
    - recvfrom
    - recvmsg
    - recvmmsg
    - rename
    - renameat
    - renameat2
    - rmdir
    - sendto
    - sendmsg
    - sendmmsg
    - setdomainname
    - setfsgid
    - setfsuid
    - setgid
    - setgroups
    - sethostname
    - setpgid
    - setregid
    - setresgid
    - setresuid
    - setreuid
    - setsid
    - settimeofday
    - setuid
    - shutdown
    - socket
    - symlink
    - symlinkat
    - syslog
    - umount
    - umount2
    - unlink
    - unlinkat
    - userfaultfd
    - utime
    - utimensat
    - utimes
    - vfork
    - vserver
nyrahul commented 3 years ago

The issue can be produced with two simple steps:

  1. execute: sudo sysdig syscall.type=bpf and evt.res!=nil
  2. execute any code that loads bpf code ... use examples from libbpf/bootstrap.

The problem is with evt.res handling of bpf syscall in sysdig. If the evt.res clause is removed then there is no core dump.

Since cilium agent pods use bpf syscall, the issue is seen in the context.

github-actions[bot] commented 1 year ago

This issue has been automatically marked as stale because it has not had recent activity. It will be closed if no further activity occurs. Thank you for your contributions.