falcosecurity / libs

libsinsp, libscap, the kernel module driver, and the eBPF driver sources
https://falcosecurity.github.io/libs/
Apache License 2.0
229 stars 164 forks source link

[BUG] Verifier failure on `cos-beta-117-18613-0-66` #2118

Open Andreagit97 opened 9 hours ago

Andreagit97 commented 9 hours ago

Describe the bug

Running the modern bpf probe on:

We face the following verifier error

libbpf: prog 'capset_x': BPF program load failed: Permission denied
libbpf: prog 'capset_x': -- BEGIN PROG LOAD LOG --
reg type unsupported for arg#0 function capset_x#982
0: R1=ctx(off=0,imm=0) R10=fp0
; int BPF_PROG(capset_x,
0: (bf) r7 = r1                       ; R1=ctx(off=0,imm=0) R7_w=ctx(off=0,imm=0)
; int BPF_PROG(capset_x,
1: (79) r9 = *(u64 *)(r7 +8)          ; R7_w=ctx(off=0,imm=0) R9_w=scalar()
; uint32_t cpu_id = (uint32_t)bpf_get_smp_processor_id();
2: (85) call bpf_get_smp_processor_id#8       ; R0_w=scalar(umax=1,var_off=(0x0; 0x1))
; uint32_t cpu_id = (uint32_t)bpf_get_smp_processor_id();
3: (63) *(u32 *)(r10 -8) = r0         ; R0_w=scalar(umax=1,var_off=(0x0; 0x1)) R10=fp0 fp-8=
4: (bf) r2 = r10                      ; R2_w=fp0 R10=fp0
;
5: (07) r2 += -8                      ; R2_w=fp-8
; return (struct ringbuf_map *)bpf_map_lookup_elem(&ringbuf_maps, &cpu_id);
6: (18) r1 = 0xffff88810ab0fe00       ; R1_w=map_ptr(off=0,ks=4,vs=4,imm=0)
8: (85) call bpf_map_lookup_elem#1    ; R0=map_value_or_null(id=1,off=0,ks=4,vs=4,imm=0)
9: (bf) r6 = r0                       ; R0=map_value_or_null(id=1,off=0,ks=4,vs=4,imm=0) R6_w=map_value_or_null(id=1,off=0,ks=4,vs=4,imm=0)
; if(!rb)
10: (55) if r6 != 0x0 goto pc+6 17: R0=map_ptr(off=0,ks=0,vs=0,imm=0) R6=map_ptr(off=0,ks=0,vs=0,imm=0) R7=ctx(off=0,imm=0) R9=scalar() R10=fp0 fp-8=????mmmm
; uint32_t cpu_id = (uint32_t)bpf_get_smp_processor_id();
17: (85) call bpf_get_smp_processor_id#8      ; R0_w=scalar(umax=1,var_off=(0x0; 0x1))
; uint32_t cpu_id = (uint32_t)bpf_get_smp_processor_id();
18: (63) *(u32 *)(r10 -8) = r0        ; R0_w=scalar(umax=1,var_off=(0x0; 0x1)) R10=fp0 fp-8=
19: (bf) r2 = r10                     ; R2_w=fp0 R10=fp0
;
20: (07) r2 += -8                     ; R2_w=fp-8
; return (struct counter_map *)bpf_map_lookup_elem(&counter_maps, &cpu_id);
21: (18) r1 = 0xffff88812a85cc00      ; R1_w=map_ptr(off=0,ks=4,vs=136,imm=0)
23: (85) call bpf_map_lookup_elem#1   ; R0_w=map_value_or_null(id=2,off=0,ks=4,vs=136,imm=0)
24: (bf) r7 = r0                      ; R0_w=map_value_or_null(id=2,off=0,ks=4,vs=136,imm=0) R7_w=map_value_or_null(id=2,off=0,ks=4,vs=136,imm=0)
; if(!counter)
25: (15) if r7 == 0x0 goto pc+330     ; R7_w=map_value(off=0,ks=4,vs=136,imm=0)
; counter->n_evts++;
26: (79) r1 = *(u64 *)(r7 +0)         ; R1_w=scalar() R7_w=map_value(off=0,ks=4,vs=136,imm=0)
27: (07) r1 += 1                      ; R1_w=scalar()
28: (7b) *(u64 *)(r7 +0) = r1         ; R1_w=scalar() R7_w=map_value(off=0,ks=4,vs=136,imm=0)
; uint8_t *space = bpf_ringbuf_reserve(rb, event_size, 0);
29: (bf) r1 = r6                      ; R1_w=map_ptr(off=0,ks=0,vs=0,imm=0) R6=map_ptr(off=0,ks=0,vs=0,imm=0)
30: (b7) r2 = 66                      ; R2_w=66
31: (b7) r3 = 0                       ; R3_w=0
32: (85) call bpf_ringbuf_reserve#131         ; R0=ringbuf_mem_or_null(id=4,ref_obj_id=4,off=0,imm=0) refs=4
33: (bf) r6 = r0                      ; R0=ringbuf_mem_or_null(id=4,ref_obj_id=4,off=0,imm=0) R6_w=ringbuf_mem_or_null(id=4,ref_obj_id=4,off=0,imm=0) refs=4
; if(!space)
34: (55) if r6 != 0x0 goto pc+7 42: R0=ringbuf_mem(ref_obj_id=4,off=0,imm=0) R6_w=ringbuf_mem(ref_obj_id=4,off=0,imm=0) R7=map_value(off=0,ks=4,vs=136,imm=0) R9=scalar() R10=fp0 fp-8=????mmmm refs=4
; return g_event_params_table[event_id];
42: (18) r1 = 0xffffc900044cc010      ; R1_w=map_value(off=16,ks=4,vs=245818,imm=0) refs=4
44: (71) r2 = *(u8 *)(r1 +353)        ; R1_w=map_value(off=16,ks=4,vs=245818,imm=0) R2_w=4 refs=4
; ringbuf->payload_pos = sizeof(struct ppm_evt_hdr) + nparams * sizeof(uint16_t);
45: (bf) r7 = r2                      ; R2_w=4 R7_w=4 refs=4
46: (67) r7 <<= 1                     ; R7_w=8 refs=4
47: (7b) *(u64 *)(r10 -24) = r7       ; R7_w=8 R10=fp0 fp-24_w=8 refs=4
; ringbuf->payload_pos = sizeof(struct ppm_evt_hdr) + nparams * sizeof(uint16_t);
48: (07) r7 += 26                     ; R7_w=34 refs=4
49: (b7) r1 = 20                      ; R1_w=20 refs=4
50: (7b) *(u64 *)(r10 -32) = r2       ; R2_w=4 R10=fp0 fp-32_w=4 refs=4
; PUSH_FIXED_SIZE_TO_RINGBUF(ringbuf, param, sizeof(int64_t));
51: (2d) if r1 > r2 goto pc+1         ; R1_w=20 R2_w=4 refs=4
; return g_settings.boot_time;
53: (18) r1 = 0xffffc9000450bdb0      ; R1_w=map_value(off=3504,ks=4,vs=591841,imm=0) refs=4
55: (79) r8 = *(u64 *)(r1 +0)         ; R1_w=map_value(off=3504,ks=4,vs=591841,imm=0) R8_w=scalar() refs=4
; hdr->ts = maps__get_boot_time() + bpf_ktime_get_boot_ns();
56: (85) call bpf_ktime_get_boot_ns#125       ; R0_w=scalar() refs=4
; hdr->ts = maps__get_boot_time() + bpf_ktime_get_boot_ns();
57: (0f) r0 += r8                     ; R0_w=scalar() R8_w=scalar() refs=4
; hdr->ts = maps__get_boot_time() + bpf_ktime_get_boot_ns();
58: (bf) r1 = r0                      ; R0_w=scalar(id=5) R1_w=scalar(id=5) refs=4
59: (77) r1 >>= 56                    ; R1_w=scalar(umax=255,var_off=(0x0; 0xff)) refs=4
60: (73) *(u8 *)(r6 +7) = r1          ; R1_w=scalar(umax=255,var_off=(0x0; 0xff)) R6=ringbuf_mem(ref_obj_id=4,off=0,imm=0) refs=4
61: (bf) r1 = r0                      ; R0_w=scalar(id=5) R1_w=scalar(id=5) refs=4
62: (77) r1 >>= 48                    ; R1_w=scalar(umax=65535,var_off=(0x0; 0xffff)) refs=4
63: (73) *(u8 *)(r6 +6) = r1          ; R1_w=scalar(umax=65535,var_off=(0x0; 0xffff)) R6=ringbuf_mem(ref_obj_id=4,off=0,imm=0) refs=4
64: (bf) r1 = r0                      ; R0_w=scalar(id=5) R1_w=scalar(id=5) refs=4
65: (77) r1 >>= 40                    ; R1_w=scalar(umax=16777215,var_off=(0x0; 0xffffff)) refs=4
66: (73) *(u8 *)(r6 +5) = r1          ; R1_w=scalar(umax=16777215,var_off=(0x0; 0xffffff)) R6=ringbuf_mem(ref_obj_id=4,off=0,imm=0) refs=4
67: (bf) r1 = r0                      ; R0_w=scalar(id=5) R1_w=scalar(id=5) refs=4
68: (77) r1 >>= 32                    ; R1_w=scalar(umax=4294967295,var_off=(0x0; 0xffffffff)) refs=4
69: (73) *(u8 *)(r6 +4) = r1          ; R1_w=scalar(umax=4294967295,var_off=(0x0; 0xffffffff)) R6=ringbuf_mem(ref_obj_id=4,off=0,imm=0) refs=4
70: (bf) r1 = r0                      ; R0_w=scalar(id=5) R1_w=scalar(id=5) refs=4
71: (77) r1 >>= 24                    ; R1_w=scalar(umax=1099511627775,var_off=(0x0; 0xffffffffff)) refs=4
72: (73) *(u8 *)(r6 +3) = r1          ; R1_w=scalar(umax=1099511627775,var_off=(0x0; 0xffffffffff)) R6=ringbuf_mem(ref_obj_id=4,off=0,imm=0) refs=4
73: (bf) r1 = r0                      ; R0_w=scalar(id=5) R1_w=scalar(id=5) refs=4
74: (77) r1 >>= 16                    ; R1_w=scalar(umax=281474976710655,var_off=(0x0; 0xffffffffffff)) refs=4
75: (73) *(u8 *)(r6 +2) = r1          ; R1_w=scalar(umax=281474976710655,var_off=(0x0; 0xffffffffffff)) R6=ringbuf_mem(ref_obj_id=4,off=0,imm=0) refs=4
76: (73) *(u8 *)(r6 +0) = r0          ; R0_w=scalar(id=5) R6=ringbuf_mem(ref_obj_id=4,off=0,imm=0) refs=4
77: (77) r0 >>= 8                     ; R0_w=scalar(umax=72057594037927935,var_off=(0x0; 0xffffffffffffff)) refs=4
78: (73) *(u8 *)(r6 +1) = r0          ; R0_w=scalar(umax=72057594037927935,var_off=(0x0; 0xffffffffffffff)) R6=ringbuf_mem(ref_obj_id=4,off=0,imm=0) refs=4
; hdr->tid = bpf_get_current_pid_tgid() & 0xffffffff;
79: (85) call bpf_get_current_pid_tgid#14     ; R0=scalar() refs=4
80: (b7) r1 = 1                       ; R1_w=1 refs=4
; hdr->type = ringbuf->event_type;
81: (73) *(u8 *)(r6 +21) = r1         ; R1_w=1 R6=ringbuf_mem(ref_obj_id=4,off=0,imm=0) refs=4
82: (b7) r1 = 97                      ; R1_w=97 refs=4
83: (73) *(u8 *)(r6 +20) = r1         ; R1_w=97 R6=ringbuf_mem(ref_obj_id=4,off=0,imm=0) refs=4
84: (b7) r1 = 0                       ; R1_w=0 refs=4
; hdr->nparams = nparams;
85: (73) *(u8 *)(r6 +25) = r1         ; R1_w=0 R6=ringbuf_mem(ref_obj_id=4,off=0,imm=0) refs=4
86: (73) *(u8 *)(r6 +24) = r1         ; R1_w=0 R6=ringbuf_mem(ref_obj_id=4,off=0,imm=0) refs=4
87: (73) *(u8 *)(r6 +23) = r1         ; R1_w=0 R6=ringbuf_mem(ref_obj_id=4,off=0,imm=0) refs=4
; hdr->tid = bpf_get_current_pid_tgid() & 0xffffffff;
88: (73) *(u8 *)(r6 +15) = r1         ; R1_w=0 R6=ringbuf_mem(ref_obj_id=4,off=0,imm=0) refs=4
89: (73) *(u8 *)(r6 +14) = r1         ; R1_w=0 R6=ringbuf_mem(ref_obj_id=4,off=0,imm=0) refs=4
90: (73) *(u8 *)(r6 +13) = r1         ; R1_w=0 R6=ringbuf_mem(ref_obj_id=4,off=0,imm=0) refs=4
91: (73) *(u8 *)(r6 +12) = r1         ; R1_w=0 R6=ringbuf_mem(ref_obj_id=4,off=0,imm=0) refs=4
; hdr->len = ringbuf->reserved_event_size;
92: (73) *(u8 *)(r6 +19) = r1         ; R1_w=0 R6=ringbuf_mem(ref_obj_id=4,off=0,imm=0) refs=4
93: (73) *(u8 *)(r6 +18) = r1         ; R1_w=0 R6=ringbuf_mem(ref_obj_id=4,off=0,imm=0) refs=4
94: (73) *(u8 *)(r6 +17) = r1         ; R1_w=0 R6=ringbuf_mem(ref_obj_id=4,off=0,imm=0) refs=4
95: (b7) r1 = 66                      ; R1_w=66 refs=4
96: (73) *(u8 *)(r6 +16) = r1         ; R1_w=66 R6=ringbuf_mem(ref_obj_id=4,off=0,imm=0) refs=4
; hdr->tid = bpf_get_current_pid_tgid() & 0xffffffff;
97: (bf) r1 = r0                      ; R0=scalar(id=6) R1_w=scalar(id=6) refs=4
98: (77) r1 >>= 24                    ; R1_w=scalar(umax=1099511627775,var_off=(0x0; 0xffffffffff)) refs=4
99: (73) *(u8 *)(r6 +11) = r1         ; R1_w=scalar(umax=1099511627775,var_off=(0x0; 0xffffffffff)) R6=ringbuf_mem(ref_obj_id=4,off=0,imm=0) refs=4
100: (bf) r1 = r0                     ; R0=scalar(id=6) R1_w=scalar(id=6) refs=4
101: (77) r1 >>= 16                   ; R1_w=scalar(umax=281474976710655,var_off=(0x0; 0xffffffffffff)) refs=4
102: (73) *(u8 *)(r6 +10) = r1        ; R1_w=scalar(umax=281474976710655,var_off=(0x0; 0xffffffffffff)) R6=ringbuf_mem(ref_obj_id=4,off=0,imm=0) refs=4
103: (73) *(u8 *)(r6 +8) = r0         ; R0=scalar(id=6) R6=ringbuf_mem(ref_obj_id=4,off=0,imm=0) refs=4
104: (77) r0 >>= 8                    ; R0_w=scalar(umax=72057594037927935,var_off=(0x0; 0xffffffffffffff)) refs=4
105: (73) *(u8 *)(r6 +9) = r0         ; R0_w=scalar(umax=72057594037927935,var_off=(0x0; 0xffffffffffffff)) R6=ringbuf_mem(ref_obj_id=4,off=0,imm=0) refs=4
106: (79) r1 = *(u64 *)(r10 -32)      ; R1_w=4 R10=fp0 fp-32=4 refs=4
107: (bf) r8 = r1                     ; R1_w=4 R8_w=4 refs=4
; hdr->nparams = nparams;
108: (73) *(u8 *)(r6 +22) = r1        ; R1_w=4 R6=ringbuf_mem(ref_obj_id=4,off=0,imm=0) refs=4
; PUSH_FIXED_SIZE_TO_RINGBUF(ringbuf, param, sizeof(int64_t));
109: (bf) r1 = r6                     ; R1_w=ringbuf_mem(ref_obj_id=4,off=0,imm=0) R6=ringbuf_mem(ref_obj_id=4,off=0,imm=0) refs=4
110: (0f) r1 += r7                    ; R1_w=ringbuf_mem(ref_obj_id=4,off=34,imm=0) R7=34 refs=4
111: (bf) r2 = r9                     ; R2_w=scalar(id=7) R9=scalar(id=7) refs=4
112: (77) r2 >>= 48                   ; R2_w=scalar(umax=65535,var_off=(0x0; 0xffff)) refs=4
113: (73) *(u8 *)(r1 +6) = r2         ; R1_w=ringbuf_mem(ref_obj_id=4,off=34,imm=0) R2_w=scalar(umax=65535,var_off=(0x0; 0xffff)) refs=4
114: (bf) r2 = r9                     ; R2_w=scalar(id=7) R9=scalar(id=7) refs=4
115: (77) r2 >>= 56                   ; R2_w=scalar(umax=255,var_off=(0x0; 0xff)) refs=4
116: (73) *(u8 *)(r1 +7) = r2         ; R1_w=ringbuf_mem(ref_obj_id=4,off=34,imm=0) R2_w=scalar(umax=255,var_off=(0x0; 0xff)) refs=4
117: (bf) r2 = r9                     ; R2_w=scalar(id=7) R9=scalar(id=7) refs=4
118: (77) r2 >>= 32                   ; R2_w=scalar(umax=4294967295,var_off=(0x0; 0xffffffff)) refs=4
119: (73) *(u8 *)(r1 +4) = r2         ; R1_w=ringbuf_mem(ref_obj_id=4,off=34,imm=0) R2_w=scalar(umax=4294967295,var_off=(0x0; 0xffffffff)) refs=4
120: (bf) r2 = r9                     ; R2_w=scalar(id=7) R9=scalar(id=7) refs=4
121: (77) r2 >>= 40                   ; R2_w=scalar(umax=16777215,var_off=(0x0; 0xffffff)) refs=4
122: (73) *(u8 *)(r1 +5) = r2         ; R1_w=ringbuf_mem(ref_obj_id=4,off=34,imm=0) R2_w=scalar(umax=16777215,var_off=(0x0; 0xffffff)) refs=4
123: (bf) r2 = r9                     ; R2_w=scalar(id=7) R9=scalar(id=7) refs=4
124: (77) r2 >>= 16                   ; R2_w=scalar(umax=281474976710655,var_off=(0x0; 0xffffffffffff)) refs=4
125: (73) *(u8 *)(r1 +2) = r2         ; R1_w=ringbuf_mem(ref_obj_id=4,off=34,imm=0) R2_w=scalar(umax=281474976710655,var_off=(0x0; 0xffffffffffff)) refs=4
126: (bf) r2 = r9                     ; R2_w=scalar(id=7) R9=scalar(id=7) refs=4
127: (77) r2 >>= 24                   ; R2_w=scalar(umax=1099511627775,var_off=(0x0; 0xffffffffff)) refs=4
128: (73) *(u8 *)(r1 +3) = r2         ; R1_w=ringbuf_mem(ref_obj_id=4,off=34,imm=0) R2_w=scalar(umax=1099511627775,var_off=(0x0; 0xffffffffff)) refs=4
129: (73) *(u8 *)(r1 +0) = r9         ; R1_w=ringbuf_mem(ref_obj_id=4,off=34,imm=0) R9=scalar(id=7) refs=4
130: (77) r9 >>= 8                    ; R9_w=scalar(umax=72057594037927935,var_off=(0x0; 0xffffffffffffff)) refs=4
131: (73) *(u8 *)(r1 +1) = r9         ; R1_w=ringbuf_mem(ref_obj_id=4,off=34,imm=0) R9_w=scalar(umax=72057594037927935,var_off=(0x0; 0xffffffffffffff)) refs=4
132: (b7) r1 = 8                      ; R1_w=8 refs=4
133: (6b) *(u16 *)(r6 +26) = r1       ; R1_w=8 R6=ringbuf_mem(ref_obj_id=4,off=0,imm=0) refs=4
134: (18) r1 = 0x1                    ; R1_w=1 refs=4
; && (bpf_core_enum_value(enum bpf_func_id, BPF_FUNC_get_current_task_btf) == BPF_FUNC_get_current_task_btf))
136: (15) if r1 == 0x0 goto pc+5      ; R1_w=1 refs=4
137: (18) r1 = 0x9e                   ; R1_w=158 refs=4
; if(bpf_core_enum_value_exists(enum bpf_func_id, BPF_FUNC_get_current_task_btf)
139: (55) if r1 != 0x9e goto pc+2     ; R1_w=158 refs=4
; return (struct task_struct *)bpf_get_current_task_btf();
140: (85) call bpf_get_current_task_btf#158   ; R0=trusted_ptr_task_struct(off=0,imm=0) refs=4
141: (05) goto pc+1
;
143: (bf) r7 = r0                     ; R0=trusted_ptr_task_struct(off=0,imm=0) R7_w=trusted_ptr_task_struct(off=0,imm=0) refs=4
144: (18) r1 = 0x1                    ; R1_w=1 refs=4
146: (79) r9 = *(u64 *)(r10 -24)      ; R9_w=8 R10=fp0 fp-24=8 refs=4
; READ_TASK_FIELD_INTO(&cap_struct, task, cred, cap_inheritable);
147: (15) if r1 == 0x0 goto pc+7      ; R1_w=1 refs=4
148: (18) r1 = 0x9e                   ; R1_w=158 refs=4
; READ_TASK_FIELD_INTO(&cap_struct, task, cred, cap_inheritable);
150: (55) if r1 != 0x9e goto pc+4     ; R1_w=158 refs=4
; READ_TASK_FIELD_INTO(&cap_struct, task, cred, cap_inheritable);
151: (79) r1 = *(u64 *)(r7 +1984)     ; R1_w=rcu_ptr_or_null_cred(id=8,off=0,imm=0) R7_w=trusted_ptr_task_struct(off=0,imm=0) refs=4
152: (79) r1 = *(u64 *)(r1 +48)
R1 invalid mem access 'rcu_ptr_or_null_'
processed 146 insns (limit 1000000) max_states_per_insn 0 total_states 7 peak_states 7 mark_read 5
-- END PROG LOAD LOG --

More in detail

; READ_TASK_FIELD_INTO(&cap_struct, task, cred, cap_inheritable);
151: (79) r1 = *(u64 *)(r7 +1984)     ; R1_w=rcu_ptr_or_null_cred(id=8,off=0,imm=0) R7_w=trusted_ptr_task_struct(off=0,imm=0) refs=4
152: (79) r1 = *(u64 *)(r1 +48)
R1 invalid mem access 'rcu_ptr_or_null_'
processed 146 insns (limit 1000000) max_states_per_insn 0 total_states 7 peak_states 7 mark_read 5
-- END PROG LOAD LOG --

There is a problem when we try to access the task->cred field. Looking at the same program (capset_x) loaded on another kernel (6.8.0-45-generic #45~22.04.1-Ubuntu) we obtain the following register state

; READ_TASK_FIELD_INTO(&cap_struct, task, cred, cap_permitted);
238: (79) r1 = *(u64 *)(r7 +2992)     ; R1_w=ptr_cred() R7=trusted_ptr_task_struct() refs=4
239: (79) r1 = *(u64 *)(r1 +56)       ; R1_w=scalar() refs=4
240: (7b) *(u64 *)(r10 -8) = r1       ; R1_w=scalar() R10=fp0 fp-8_w=mmmmmmmm refs=4

So as you can see the same field cred is seen as a simple ptr in the Ubuntu kernel while on COS this is seen as rcu_ptr_or_null_ and so we hit the following verifier branch

    } else if (base_type(reg->type) == PTR_TO_MEM) {
        bool rdonly_mem = type_is_rdonly_mem(reg->type);

        if (type_may_be_null(reg->type)) {
            verbose(env, "R%d invalid mem access '%s'\n", regno,
                reg_type_str(env, reg->type));
            return -EACCES;
        }

Now the reason why COS is changing this type resides probably in how the type cred is marked in the kernel BTF

            t = btf_type_by_id(btf, mtype->type);
            if (btf_type_is_type_tag(t)) {
                tag_value = __btf_name_by_offset(btf, t->name_off);
                /* check __user tag */
                if (strcmp(tag_value, "user") == 0)
                    tmp_flag = MEM_USER;
                /* check __percpu tag */
                if (strcmp(tag_value, "percpu") == 0)
                    tmp_flag = MEM_PERCPU;
                /* check __rcu tag */
                if (strcmp(tag_value, "rcu") == 0)
                    tmp_flag = MEM_RCU;
            }

For some reason in COS we enter the RCU branch and once the MEM_RCU flag is set we also acquire the PTR_MAYBE_NULL flag as we can see above

        if (type_is_trusted(env, reg, field_name, btf_id)) {
            flag |= PTR_TRUSTED;
        } else if (type_is_trusted_or_null(env, reg, field_name, btf_id)) {
            flag |= PTR_TRUSTED | PTR_MAYBE_NULL;
        } else if (in_rcu_cs(env) && !type_may_be_null(reg->type)) {
            if (type_is_rcu(env, reg, field_name, btf_id)) {
                /* ignore __rcu tag and mark it MEM_RCU */
                flag |= MEM_RCU;
            } else if (flag & MEM_RCU ||
                   type_is_rcu_or_null(env, reg, field_name, btf_id)) {
                /* __rcu tagged pointers can be NULL */
                flag |= MEM_RCU | PTR_MAYBE_NULL; // <--- here we acquire the `PTR_MAYBE_NULL`

                /* We always trust them */
                if (type_is_rcu_or_null(env, reg, field_name, btf_id) &&
                    flag & PTR_UNTRUSTED)
                    flag &= ~PTR_UNTRUSTED;
            } else if (flag & (MEM_PERCPU | MEM_USER)) {
                /* keep as-is */
            } else {
                /* walking unknown pointers yields old deprecated PTR_TO_BTF_ID */
                clear_trusted_flags(&flag);
            }
        }

I will propose a possible fix for this in the short term but we should look into why this is happening on the COS kernel and if this is a bug or an intended behavior.

Andreagit97 commented 9 hours ago

@Molter73 this is the same issue we talked about some time ago

Molter73 commented 8 hours ago

We had a fix for this that I thought we had already upstreamed, sorry this fell through the cracks, it's pretty close to your PR though: stackrox/falcosecurity-libs#82

If I recall correctly, @erthalion looked into it and COS is compiling the kernel with clang, which has some additional safety annotations that are ignored by GCC and cause this verifier issue, which also matches your analysis.

Andreagit97 commented 7 hours ago

Oh, that explains why __rcu markers are considered in COS and not on other kernels, thank you for the info! For what concern the proposed fix, they are almost identical, i avoided the extra null check since BPF_CORE_READ_INTO should do it for us, if unsafe_ptr is 0 + something, copy_from_kernel_nofault will fail because this is not a kernel address and so the output will be memset to 0, and again and again until we end the iterations of BPF_CORE_READ_INTO

bpf_probe_read_kernel_common(void *dst, u32 size, const void *unsafe_ptr)
{
    int ret = -EFAULT;

    if (IS_ENABLED(CONFIG_BPF_EVENTS))
        ret = copy_from_kernel_nofault(dst, unsafe_ptr, size);
    if (unlikely(ret < 0))
        memset(dst, 0, size);
    return ret;
}