Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Issue with Sysdig when running in Kubernetes where Cilium is also installed. #1766

Open
saleem-accuknox opened this issue Jul 10, 2021 · 2 comments

Comments

@saleem-accuknox
Copy link

saleem-accuknox commented Jul 10, 2021

When I try to run Sysdig chisel in Kubernetes (GKE) with Cilium also installed, I get the Segmentation Fault:

image (9)
image (8)

But, when I run it without Cilium, I do not have any issues running it.
Command used to run Sydig Chisel:
/usr/bin/sysdig -k http://127.0.0.1:8080 -c chisel config.yaml

The chisel

-- Chisel description
description = ""
short_description = "Monitor syscalls"
category = ""

-- Chisel argument list: Invoke as sudo sysdig -c chisel/path/to/config.yaml
args =
{
	{
		name = "config_file",
		description = "The path to the configuration yaml file",
		argtype = "string"
	}
}

-- Imports
--local inspect = require('inspect')
--local json = require ("dkjson")
local pb = require "pb"
local protoc = require "protoc"
local yaml = require('yaml')
local zmq = require "lzmq"

-- List of all field names
local all_field_names = {}
-- Map of field names to handle
local field_handles = {}
-- Map of field names to split fields (e.g. field_name = "evt.name" -> { "evt", "name" })
local split_fields = {}
-- List of common field names
--local common_field_names = {}

-- ZMQ context and socket
local context
local socket

-- Argument notification callback
function on_set_arg(name, val)
	if name == "config_file" then
		config_file = val
		return true
	end
	return false
end

-- Capture start
function on_capture_start()
	return true
end

-- Initialization callback
function on_init()
	-- read configuration file
	local fd = assert(io.open(config_file, "r"))
	local content = fd:read("*all")
	fd:close()

	-- parse configuration
	local config = yaml.eval(content)
	--print(inspect(config))

	-- make sure we have required fields in the config
	assert(config.containersec.addr)
	assert(config.containersec.schema)
	assert(config.containersec.filter_suffix)
	assert(config.containersec.field_sets)
	assert(config.containersec.common_fields)
	assert(config.containersec.syscalls)
	assert(config.containersec.deployment_type)

	-- read .proto schema file (assuming you are running from project root)
	print("Loading protobuf schema: " .. config.containersec.schema)
	fd = assert(io.open(config.containersec.schema, "r"))
	content = fd:read("*all")
	fd:close()

	-- load the schema
	assert(protoc:load(content))

	-- Connect to message queue
	context = zmq.context()
	socket, err = context:socket(zmq.PUB)
	print("Binding to " .. config.containersec.addr)
	socket:bind(config.containersec.addr)

	-- Get all field names
	local index = 1
	for field_set, field_list in pairs(config.containersec.field_sets) do
		for i, field_name in ipairs(field_list) do
			all_field_names[index] = field_name
			index = index + 1
			field_handles[field_name] = chisel.request_field(field_name)
			-- Split the field name on the first period
			-- k8s.pod.name -> k8s, pod.name
			local s, e = string.find(field_name, '.', 1, true)
			-- k8s
			local category = string.sub(field_name, 1, s-1)
			-- pod.name
			local sub_field = string.sub(field_name, e+1, string.len(field_name))
			-- Replace remaining periods with underscore in fieldname
			-- pod_name
			sub_field = sub_field:gsub("%.", "_")
			-- [k8s.pod.name] = { k8s, pod_name }
			split_fields[field_name] = {category, sub_field}
		end
	end

	-- Set the filter
	local filter = "evt.type in (" .. table.concat(config.containersec.syscalls, ",") .. ") and container.id != host" .. config.containersec.filter_suffix
	--print("Filter: " .. filter)
	chisel.set_filter(filter)

	return true
end

function handle_event(evt)
	-- Create the event
	local event = {}

	-- Get values for fields
	for i, field_name in ipairs(all_field_names) do
		-- k8s.pod.name
		local field_handle = field_handles[field_name]
		local value = evt.field(field_handle)
		if value ~= nil then
			-- Get the split field names (e.g { "evt", "name" }
			-- { k8s, pod_name }
			local tokens = split_fields[field_name]
			-- Get the first and second parts
			-- k8s
			local first = tokens[1]
			-- pod_name
			local second = tokens[2]
			-- Make sure the first table exists
			if event[first] == nil then
				event[first] = {}
			end
			-- Strip brackets from field name (e.g. arg[0] -> arg0)
			second = string.gsub(second, "%[", "")
			second = string.gsub(second, "%]", "")
			-- Add the field value to event
			event[first][second] = value
		end
	end

	-- send event to message queue
	local data = assert(pb.encode("proto.Message", event))
	if socket ~= nil and not socket:closed() then
		socket:send("sysdig", zmq.SNDMORE)
		socket:send(data)
	end
end

-- Event parsing callback
function on_event()
	local status, err = pcall(handle_event, evt);

	if not status then
		print("handle_event failed: " .. err)
	end

	return true
end

-- End of capture callback
function on_capture_end()
	-- Close socket and terminate context
	socket:close()
	context:term()

	return true
end

The config file:

containersec:
  deployment_type: kubernetes # kubernetes or container
  # ZMQ bind address
  addr: tcp://*:3654
  # Path to protobuf message schema
  schema: /message.proto
  # Filter suffix
  filter_suffix: " and evt.dir = <"
  # Named field classes
  field_sets:
    kubernetes_fields:
      - k8s.pod.name
      - k8s.ns.name
      - k8s.pod.labels
    container_fields:
      - container.id
      - container.name
    evt_fields:
      - evt.args
      - evt.arg[0]
      - evt.arg[1]
      - evt.arg[2]
      - evt.arg[3]
      - evt.category
      - evt.count
      - evt.cpu
      - evt.datetime
      - evt.deltatime
      - evt.deltatime.ns
      - evt.deltatime.s
      - evt.dir
      - evt.failed
      - evt.info
      - evt.is_io
      - evt.is_io_read
      - evt.is_io_write
      - evt.is_open_read
      - evt.is_open_write
      - evt.is_syslog
      - evt.is_wait
      - evt.latency
      - evt.latency.human
      - evt.latency.ns
      - evt.latency.s
      - evt.num
      - evt.outputtime
      - evt.rawarg.res
      - evt.rawtime
      - evt.rawtime.ns
      - evt.rawtime.s
      - evt.reltime
      - evt.reltime.ns
      - evt.reltime.s
      - evt.res
      - evt.rawres
      - evt.time
      - evt.time.iso8601
      - evt.time.s
      - evt.type
    proc_fields:
      - proc.aname
      - proc.apid
      - proc.args
      - proc.cmdline
      - proc.cwd
      - proc.duration
      - proc.env
      - proc.exe
      - proc.exeline
      - proc.exepath
      - proc.fdlimit
      - proc.fdopencount
      - proc.fdusage
      - proc.is_container_healthcheck
      - proc.is_container_liveness_probe
      - proc.is_container_readiness_probe
      - proc.loginshellid
      - proc.name
      - proc.nchilds
      - proc.nthreads
      - proc.pcmdline
      - proc.pid
      - proc.pname
      - proc.ppid
      - proc.sid
      - proc.sname
      - proc.tty
      - proc.vmrss
      - proc.vmsize
      - proc.vmswap
      - proc.vpgid
      - proc.vpid
    thread_fields:
      - thread.cgroups
      - thread.exectime
      - thread.ismain
      - thread.pfmajor
      - thread.pfminor
      - thread.tid
      - thread.totexectime
      - thread.vmrss
      - thread.vmsize
      - thread.vtid
    syscall_fields:
      - syscall.type
    fd_fields:
      - fd.cip
      - fd.connected
      - fd.containerdirectory
      - fd.containername
      - fd.cport
      - fd.cproto
      - fd.dev
      - fd.dev.major
      - fd.dev.minor
      - fd.directory
      - fd.filename
      - fd.is_server
      - fd.l4proto
      - fd.lip
      - fd.lport
      - fd.lproto
      - fd.name
      - fd.name_changed
      - fd.num
      - fd.rip
      - fd.rport
      - fd.rproto
      - fd.sip
      - fd.sockfamily
      - fd.sport
      - fd.sproto
      - fd.type
      - fd.typechar
      - fd.uid
    user_fields:
      - user.uid
      - user.name
      - user.loginuid
    group_fields:
      - group.gid
      - group.name
  # List of field classes common to all system calls
  common_fields:
    - event_fields
    - proc_fields
    - syscall_fields
    - thread_fields
    - user_fields
    - group_fields
  # List of all system calls to use
  syscalls:
    - _sysctl
    - accept
    - accept4
    - arch_prctl
    - bind
    - bpf
    - capset
    - chmod
    - chown
    - chroot
    - clock_settime
    - clone
    - clone3
    - close
    - connect
    - creat
    - create_module
    - delete_module
    - dup
    - dup2
    - dup3
    - execve
    - execve
    - execveat
    - execveat
    - exit
    - fchmod
    - fchmodat
    - fchown
    - fchownat
    - fork
    - fsconfig
    - fsmount
    - fsopen
    - futimesat
    - init_module
    - ioctl
    - kexec_file_load
    - kexec_load
    - kexec_load
    - kill
    - lchown
    - link
    - linkat
    - listen
    - mkdir
    - mkdirat
    - mknod
    - mknodat
    - mount
    - move_mount
    - nfsservctl
    - open
    - openat
    - pivot_root
    - prctl
    - process_vm_readv
    - process_vm_readv
    - process_vm_writev
    - process_vm_writev
    - ptrace
    - reboot
    - recvfrom
    - recvmsg
    - recvmmsg
    - rename
    - renameat
    - renameat2
    - rmdir
    - sendto
    - sendmsg
    - sendmmsg
    - setdomainname
    - setfsgid
    - setfsuid
    - setgid
    - setgroups
    - sethostname
    - setpgid
    - setregid
    - setresgid
    - setresuid
    - setreuid
    - setsid
    - settimeofday
    - setuid
    - shutdown
    - socket
    - symlink
    - symlinkat
    - syslog
    - umount
    - umount2
    - unlink
    - unlinkat
    - userfaultfd
    - utime
    - utimensat
    - utimes
    - vfork
    - vserver
@nyrahul
Copy link

nyrahul commented Aug 6, 2021

The issue can be produced with two simple steps:

  1. execute: sudo sysdig syscall.type=bpf and evt.res!=nil
  2. execute any code that loads bpf code ... use examples from libbpf/bootstrap.

The problem is with evt.res handling of bpf syscall in sysdig. If the evt.res clause is removed then there is no core dump.

Since cilium agent pods use bpf syscall, the issue is seen in the context.

@github-actions
Copy link

This issue has been automatically marked as stale because it has not had recent activity. It will be closed if no further activity occurs. Thank you for your contributions.

@github-actions github-actions bot added the stale label Feb 22, 2023
@github-actions github-actions bot closed this as not planned Won't fix, can't repro, duplicate, stale Mar 2, 2023
@therealbobo therealbobo reopened this Mar 9, 2023
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Projects
None yet
Development

No branches or pull requests

3 participants