Open Anjali05 opened 1 month ago
Currently BPF does not have helpers/kfuncs to identify the context. So I don't have a good way either. Is it possible that you filter some stack trace if their pattern suggests it is from non-process context?
@yonghong-song There are patterns but if I filter lock acquisitions like this, I will be losing data on some locks. For example in stacks like these, I do not have separate stack traces giving me the name of the lock acquired by folio_lruvec_lock_irqsave
:
"locks": [
"&head->lock"
],
"kernel_stack": [
"_raw_spin_lock+0x5 [kernel]",
"__pcpu_freelist_pop+0xb3 [kernel]",
"pcpu_freelist_pop+0x1a [kernel]",
"__bpf_get_stackid+0x147 [kernel]",
"bpf_get_stackid_raw_tp+0x56 [kernel]",
"bpf_prog_6deef7357e7b4530+0x91a6 [bpf]",
"ipmi_si_pci_init.cold+0x11741 [ipmi_si]",
"_raw_spin_lock_nested+0x5 [kernel]",
"raw_spin_rq_lock_nested+0x23 [kernel]",
"try_to_wake_up+0x249 [kernel]",
"hrtimer_wakeup+0x1e [kernel]",
"__hrtimer_run_queues+0x137 [kernel]",
"hrtimer_interrupt+0x118 [kernel]",
"__sysvec_apic_timer_interrupt+0x97 [kernel]",
"sysvec_apic_timer_interrupt+0x9d [kernel]",
"asm_sysvec_apic_timer_interrupt+0x16 [kernel]",
"unwind_next_frame+0x34 [kernel]",
"__unwind_start+0x15b [kernel]",
"perf_callchain_kernel+0x9d [kernel]",
"get_perf_callchain+0xc5 [kernel]",
"bpf_get_stackid+0x69 [kernel]",
"bpf_get_stackid_raw_tp+0x56 [kernel]",
"bpf_prog_6deef7357e7b4530+0x8b7e [bpf]",
"ipmi_si_pci_init.cold+0xd73e [ipmi_si]",
"_raw_spin_lock_irqsave+0x5 [kernel]",
"folio_lruvec_lock_irqsave+0x5e [kernel]",
"folio_batch_move_lru+0x58 [kernel]",
"folio_add_lru+0x9c [kernel]",
"shmem_get_folio_gfp+0x3b4 [kernel]",
"shmem_fault+0x76 [kernel]",
"__do_fault+0x31 [kernel]",
"do_fault+0x21c [kernel]",
"__handle_mm_fault+0x628 [kernel]",
"handle_mm_fault+0x107 [kernel]",
"__get_user_pages+0x231 [kernel]",
"get_user_pages_unlocked+0xd3 [kernel]",
"hva_to_pfn+0xfb [kvm]",
"kvm_faultin_pfn+0x91 [kvm]",
"direct_page_fault+0x22c [kvm]",
"kvm_mmu_page_fault+0x10d [kvm]",
"vmx_handle_exit+0x16b [kvm_intel]",
"vcpu_enter_guest+0xa40 [kvm]",
"kvm_arch_vcpu_ioctl_run+0x266 [kvm]",
"kvm_vcpu_ioctl+0x27d [kvm]",
"__x64_sys_ioctl+0x88 [kernel]",
"do_syscall_64+0x38 [kernel]",
"entry_SYSCALL_64_after_hwframe+0x63 [kernel]",
""
],
"addresses": [
"0xffffffff81cd0c35",
"0xffffffff812559e3",
"0xffffffff81255c2a",
"0xffffffff812718c7",
"0xffffffff81211ea6",
"0xffffffffa0018192",
"0xffffffffa09a0052",
"0xffffffff81cd0cb5",
"0xffffffff810e1013",
"0xffffffff810ea3b9",
"0xffffffff8116ad9e",
"0xffffffff8116b057",
"0xffffffff8116bc38",
"0xffffffff8106eb67",
"0xffffffff81cbb13d",
"0xffffffff81e00e46",
"0xffffffff81082c74",
"0xffffffff810833eb",
"0xffffffff810067dd",
"0xffffffff81291115",
"0xffffffff81271bf9",
"0xffffffff81211ea6",
"0xffffffffa0017b6a",
"0xffffffffa099c04f",
"0xffffffff81cd0e95",
"0xffffffff8136847e",
"0xffffffff812af818",
"0xffffffff812af9cc",
"0xffffffff812c07a4",
"0xffffffff812c0b96",
"0xffffffff812e3aa1",
"0xffffffff812e4a1c",
"0xffffffff812e8fe8",
"0xffffffff812e9ac7",
"0xffffffff812dbeb1",
"0xffffffff812dc3d3",
"0xffffffffa0e6196b",
"0xffffffffa0eaf961",
"0xffffffffa0eb631c",
"0xffffffffa0eb6b1d",
"0xffffffffa0df976b",
"0xffffffffa0e843f0",
"0xffffffffa0e863b6",
"0xffffffffa0e5c11d",
"0xffffffff813a0ea8",
"0xffffffff81cb7148",
"0xffffffff81e0009b"
]
},
I did manually add tracepoints/kfunc probes for some interrupt handlers like this:
BPF_PERCPU_ARRAY(irq_flags, u64, 1);
// Function to mark the start of interrupt handling
TRACEPOINT_PROBE(irq, irq_handler_entry) {
int cpu = bpf_get_smp_processor_id();
u64 val = 1; // Mark as in-interrupt
irq_flags.update(&cpu, &val);
return 0;
}
// Function to mark the end of interrupt handling
TRACEPOINT_PROBE(irq, irq_handler_exit) {
int cpu = bpf_get_smp_processor_id();
u64 val = 0; // Mark as not in-interrupt
irq_flags.update(&cpu, &val);
return 0;
}
// Function to mark the start of interrupt handling
TRACEPOINT_PROBE(irq, softirq_entry) {
int cpu = bpf_get_smp_processor_id();
u64 val = 1; // Mark as in-interrupt
irq_flags.update(&cpu, &val);
return 0;
}
// Function to mark the end of interrupt handling
TRACEPOINT_PROBE(irq, softirq_exit) {
int cpu = bpf_get_smp_processor_id();
u64 val = 0; // Mark as not in-interrupt
irq_flags.update(&cpu, &val);
return 0;
}
static void handle_hrtimer_interrupt_enter() {
int cpu = bpf_get_smp_processor_id();
u64 val = 1; // Mark as in-interrupt
irq_flags.update(&cpu, &val);
}
// The probe for hrtimer interrupt entry
KFUNC_PROBE(hrtimer_interrupt) {
handle_hrtimer_interrupt_enter();
return 0;
}
static void handle_hrtimer_interrupt_exit() {
int cpu = bpf_get_smp_processor_id();
u64 val = 0; // Mark as not in-interrupt
irq_flags.update(&cpu, &val);
}
// The probe for hrtimer interrupt exit
KRETFUNC_PROBE(hrtimer_interrupt) {
handle_hrtimer_interrupt_exit();
return 0;
}
static void handle_sysvec_apic_timer_interrupt_enter() {
int cpu = bpf_get_smp_processor_id();
u64 val = 1; // Mark as in-interrupt
irq_flags.update(&cpu, &val);
}
KFUNC_PROBE(__sysvec_apic_timer_interrupt) {
handle_sysvec_apic_timer_interrupt_enter();
return 0;
}
static void handle_sysvec_apic_timer_interrupt_exit() {
int cpu = bpf_get_smp_processor_id();
u64 val = 0; // Mark as not in-interrupt
irq_flags.update(&cpu, &val);
}
KRETFUNC_PROBE(__sysvec_apic_timer_interrupt) {
handle_sysvec_apic_timer_interrupt_exit();
return 0;
}
static int lock__NAME_(struct pt_regs *ctx, LOCK_TYPE *lock)
{
u32 current_pid = bpf_get_current_pid_tgid();
// Check if we're in an interrupt context
int cpu = bpf_get_smp_processor_id();
u64 *flag = irq_flags.lookup(&cpu);
if (flag && *flag == 1) {
//bpf_trace_printk("interrupt context\\n");
return 0; // In-interrupt, skip stack trace capture
}
....
}
But I do not think it's doing anything. Do you have any other suggestions?
@yonghong-song Can you point me to any useful guide to create a helper function to access context?
with libbpf the following works:
#include <vmlinux.h>
extern const struct pcpu_hot pcpu_hot __ksym __weak;
SEC("syscall")
int pcpu_hot_test(void *ctx)
{
struct pcpu_hot *p = bpf_this_cpu_ptr(&pcpu_hot);
bpf_printk("pc %x\n", p->preempt_count);
return 0;
}
Then add your own on top:
# define irq_count() (preempt_count() & (NMI_MASK | HARDIRQ_MASK | SOFTIRQ_MASK))
#define in_interrupt() (irq_count())
Hi,
I am using an extended version of klockstat tool to trace spin locks but because of interrupts, the stack traces I am getting are quite messy. Ideally, I would like to trace only in the process context. Is there a way to do this in eBPF currently? I tried using
in_interrupt
but I do not think its supported. Can anyone suggest any workaround for this? Thanks in advance!