Edit File: plumbing_helpers.h
// SPDX-License-Identifier: GPL-2.0-only OR MIT /* Copyright (C) 2023 The Falco Authors. This file is dual licensed under either the MIT or GPL 2. See MIT.txt or GPL2.txt for full copies of the license. */ #ifndef __PLUMBING_HELPERS_H #define __PLUMBING_HELPERS_H #include <linux/ptrace.h> #include <linux/version.h> #include <linux/fdtable.h> #include "types.h" #include "builtins.h" #include "socketcall_to_syscall.h" #define _READ(P) \ ({ \ typeof(P) _val; \ bpf_probe_read_kernel(&_val, sizeof(_val), &P); \ _val; \ }) #define _READ_KERNEL(P) _READ(P) #define _READ_USER(P) \ ({ \ typeof(P) _val; \ bpf_probe_read_user(&_val, sizeof(_val), &P); \ _val; \ }) #ifdef BPF_DEBUG #define bpf_printk(fmt, ...) \ do { \ char s[] = fmt; \ bpf_trace_printk(s, sizeof(s), ##__VA_ARGS__); \ } while(0) #else #define bpf_printk(fmt, ...) #endif #ifndef BPF_SUPPORTS_RAW_TRACEPOINTS static __always_inline int __stash_args(unsigned long long id, unsigned long *args) { int ret = bpf_map_update_elem(&stash_map, &id, args, BPF_ANY); if(ret) bpf_printk("error stashing arguments for %d:%d\n", id, ret); return ret; } static __always_inline int stash_args(unsigned long *args) { unsigned long long id = bpf_get_current_pid_tgid() & 0xffffffff; return __stash_args(id, args); } static __always_inline unsigned long *__unstash_args(unsigned long long id) { struct sys_stash_args *args; args = bpf_map_lookup_elem(&stash_map, &id); if(!args) return NULL; return args->args; } static __always_inline unsigned long *unstash_args(void) { unsigned long long id = bpf_get_current_pid_tgid() & 0xffffffff; return __unstash_args(id); } static __always_inline void delete_args(void) { unsigned long long id = bpf_get_current_pid_tgid() & 0xffffffff; bpf_map_delete_elem(&stash_map, &id); } #endif /* Can be called just from an exit event */ static __always_inline long bpf_syscall_get_retval(void *ctx) { struct sys_exit_args *args = (struct sys_exit_args *)ctx; return args->ret; } static __always_inline bool bpf_in_ia32_syscall() { struct task_struct *task = (struct task_struct *)bpf_get_current_task(); uint32_t status = 0; #ifndef CONFIG_THREAD_INFO_IN_TASK // If task_struct has no embedded thread_info, // we cannot deduce anything. Just return. // NOTE: this means that emulated 32bit syscalls will // be parsed as 64bits syscalls. // However, our minimum supported kernel releases // already enforce that CONFIG_THREAD_INFO_IN_TASK is defined, // therefore we already show a warning to the user // when building against an unsupported kernel release. #warning "bpf_in_ia32_syscall() support disabled since CONFIG_THREAD_INFO_IN_TASK is undefined." return false; #elif CONFIG_X86_64 #if LINUX_VERSION_CODE < KERNEL_VERSION(4, 14, 18) status = _READ(task->thread.status); #elif LINUX_VERSION_CODE < KERNEL_VERSION(4, 15, 0) status = _READ(task->thread_info.status); #elif LINUX_VERSION_CODE < KERNEL_VERSION(4, 15, 2) status = _READ(task->thread.status); #else status = _READ(task->thread_info.status); #endif /* LINUX_VERSION_CODE < KERNEL_VERSION(4, 14, 18) */ /* See here for the definition: * https://github.com/torvalds/linux/blob/69cb6c6556ad89620547318439d6be8bb1629a5a/arch/x86/include/asm/thread_info.h#L212 */ return status & TS_COMPAT; #elif defined(CONFIG_ARM64) /* See here for the definition: * https://github.com/torvalds/linux/blob/69cb6c6556ad89620547318439d6be8bb1629a5a/arch/arm64/include/asm/thread_info.h#L99 */ status = _READ(task->thread_info.flags); return status & _TIF_32BIT; #elif defined(CONFIG_S390) /* See here for the definition: * https://github.com/torvalds/linux/blob/69cb6c6556ad89620547318439d6be8bb1629a5a/arch/s390/include/asm/thread_info.h#L101 */ status = _READ(task->thread_info.flags); return status & _TIF_31BIT; #elif defined(CONFIG_PPC64) /* See here for the definition: * https://github.com/torvalds/linux/blob/9b6de136b5f0158c60844f85286a593cb70fb364/arch/powerpc/include/asm/thread_info.h#L127 */ status = _READ(task->thread_info.flags); return status & _TIF_32BIT; #else /* Unknown architecture. */ return false; #endif /* CONFIG_X86_64 */ } /* Can be called from both enter and exit event, id is at the same * offset in both struct sys_enter_args and struct sys_exit_args */ static __always_inline long bpf_syscall_get_nr(void *ctx) { struct sys_enter_args *args = (struct sys_enter_args *)ctx; long id = 0; #ifdef BPF_SUPPORTS_RAW_TRACEPOINTS struct pt_regs *regs = (struct pt_regs *)args->regs; #ifdef CONFIG_X86_64 /* See here for the definition: * https://github.com/torvalds/linux/blob/69cb6c6556ad89620547318439d6be8bb1629a5a/arch/x86/include/asm/syscall.h#L40 */ id = _READ(regs->orig_ax); #elif CONFIG_ARM64 /* See here for the definition: * https://github.com/torvalds/linux/blob/69cb6c6556ad89620547318439d6be8bb1629a5a/arch/arm64/include/asm/syscall.h#L23 */ id = _READ(regs->syscallno); #elif CONFIG_S390 /* See here for the definition: * https://github.com/torvalds/linux/blob/69cb6c6556ad89620547318439d6be8bb1629a5a/arch/s390/include/asm/syscall.h#L24 */ id = _READ(regs->int_code); id = id & 0xffff; #elif CONFIG_PPC64 /* See here for the definition: * https://github.com/torvalds/linux/blob/f1a09972a45ae63efbd1587337c4be13b1893330/arch/powerpc/include/asm/syscall.h#L37 */ id = _READ(regs->gpr[0]); #endif /* CONFIG_X86_64 */ #else id = args->id; #endif /* BPF_SUPPORTS_RAW_TRACEPOINTS */ return id; } #ifndef BPF_SUPPORTS_RAW_TRACEPOINTS static __always_inline unsigned long bpf_syscall_get_argument_from_args(unsigned long *args, int idx) { unsigned long arg = 0; if(idx <= 5) { arg = args[idx]; } return arg; } #endif static __always_inline unsigned long bpf_syscall_get_argument_from_ctx(void *ctx, int idx) { unsigned long arg = 0; #ifdef BPF_SUPPORTS_RAW_TRACEPOINTS struct sys_enter_args *args = (struct sys_enter_args *)ctx; struct pt_regs *regs = (struct pt_regs *)args->regs; #ifdef CONFIG_X86_64 if(bpf_in_ia32_syscall()) { switch(idx) { case 0: arg = _READ(regs->bx); break; case 1: arg = _READ(regs->cx); break; case 2: arg = _READ(regs->dx); break; case 3: arg = _READ(regs->si); break; case 4: arg = _READ(regs->di); break; case 5: arg = _READ(regs->bp); break; default: arg = 0; } return arg; } /* See here for the definition: * https://github.com/libbpf/libbpf/blob/master/src/bpf_tracing.h#L75-L87 */ switch(idx) { case 0: arg = _READ(regs->di); break; case 1: arg = _READ(regs->si); break; case 2: arg = _READ(regs->dx); break; case 3: arg = _READ(regs->r10); break; case 4: arg = _READ(regs->r8); break; case 5: arg = _READ(regs->r9); break; default: arg = 0; } #elif CONFIG_ARM64 /* See here for the definition: * https://github.com/libbpf/libbpf/blob/master/src/bpf_tracing.h#L166-L178 */ struct user_pt_regs *user_regs = (struct user_pt_regs *)args->regs; switch(idx) { case 0: arg = _READ(regs->orig_x0); break; case 1: case 2: case 3: case 4: case 5: arg = _READ(user_regs->regs[idx]); break; default: arg = 0; } #elif CONFIG_S390 /* See here for the definition: * https://github.com/libbpf/libbpf/blob/master/src/bpf_tracing.h#L132-L144 */ user_pt_regs *user_regs = (user_pt_regs *)args->regs; switch(idx) { case 0: arg = _READ(regs->orig_gpr2); break; case 1: case 2: case 3: case 4: case 5: arg = _READ(user_regs->gprs[idx + 2]); break; default: arg = 0; } #elif CONFIG_PPC64 /* See here for the definition: * https://github.com/libbpf/libbpf/blob/master/src/bpf_tracing.h#L290-L306 */ switch(idx) { case 0: arg = _READ(regs->orig_gpr3); break; case 1: case 2: case 3: case 4: case 5: arg = _READ(regs->gpr[idx + 3]); break; default: arg = 0; } #endif /* CONFIG_X86_64 */ #else unsigned long *args = unstash_args(); if(args) arg = bpf_syscall_get_argument_from_args(args, idx); else arg = 0; #endif /* BPF_SUPPORTS_RAW_TRACEPOINTS */ return arg; } static __always_inline unsigned long bpf_syscall_get_socketcall_arg(void *ctx, int idx) { unsigned long arg = 0; unsigned long args_pointer = 0; args_pointer = bpf_syscall_get_argument_from_ctx(ctx, 1); if(bpf_in_ia32_syscall()) { bpf_probe_read_user(&arg, sizeof(uint32_t), (void *)(args_pointer + (idx * sizeof(uint32_t)))); } else { bpf_probe_read_user(&arg, sizeof(unsigned long), (void *)(args_pointer + (idx * sizeof(unsigned long)))); } return arg; } static __always_inline unsigned long bpf_syscall_get_argument(struct filler_data *data, int idx) { #ifdef BPF_SUPPORTS_RAW_TRACEPOINTS // We define it here because we support socket calls only on kernels with // BPF_SUPPORTS_RAW_TRACEPOINTS `data->state->tail_ctx.socketcall_syscall_id != -1` just to // improve perf if(data->state->tail_ctx.socketcall_syscall_id != -1 && bpf_syscall_get_nr(data->ctx) == data->state->tail_ctx.socketcall_syscall_id) { return bpf_syscall_get_socketcall_arg(data->ctx, idx); } return bpf_syscall_get_argument_from_ctx(data->ctx, idx); #else return bpf_syscall_get_argument_from_args(data->args, idx); #endif } static __always_inline char *get_frame_scratch_area(unsigned int cpu) { char *scratchp; scratchp = bpf_map_lookup_elem(&frame_scratch_map, &cpu); if(!scratchp) bpf_printk("frame scratch NULL\n"); return scratchp; } static __always_inline char *get_tmp_scratch_area(unsigned int cpu) { char *scratchp; scratchp = bpf_map_lookup_elem(&tmp_scratch_map, &cpu); if(!scratchp) bpf_printk("tmp scratch NULL\n"); return scratchp; } static __always_inline const struct syscall_evt_pair *get_syscall_info(int id) { const struct syscall_evt_pair *p = bpf_map_lookup_elem(&syscall_table, &id); if(!p) bpf_printk("no syscall_info for %d\n", id); return p; } static __always_inline bool is_syscall_interesting(int id) { bool *enabled = bpf_map_lookup_elem(&interesting_syscalls_table, &id); if(!enabled) { bpf_printk("no syscall_info for %d\n", id); return false; } return *enabled; } static __always_inline int convert_ia32_to_64(int id) { int *x64_id = bpf_map_lookup_elem(&ia32_64_map, &id); if(!x64_id) { bpf_printk("no 64bit mapped value for %d\n", id); return -1; } return *x64_id; } static __always_inline const struct ppm_event_info *get_event_info(ppm_event_code event_type) { const struct ppm_event_info *e = bpf_map_lookup_elem(&event_info_table, &event_type); if(!e) bpf_printk("no event info for %d\n", event_type); return e; } static __always_inline const struct ppm_event_entry *get_event_filler_info( ppm_event_code event_type) { const struct ppm_event_entry *e; e = bpf_map_lookup_elem(&fillers_table, &event_type); if(!e) bpf_printk("no filler info for %d\n", event_type); return e; } static __always_inline struct scap_bpf_settings *get_bpf_settings(void) { struct scap_bpf_settings *settings; int id = 0; settings = bpf_map_lookup_elem(&settings_map, &id); if(!settings) bpf_printk("settings NULL\n"); return settings; } static __always_inline struct scap_bpf_per_cpu_state *get_local_state(unsigned int cpu) { struct scap_bpf_per_cpu_state *state; state = bpf_map_lookup_elem(&local_state_map, &cpu); if(!state) bpf_printk("state NULL\n"); return state; } static __always_inline bool acquire_local_state(struct scap_bpf_per_cpu_state *state) { if(state->in_use) { bpf_printk("acquire_local_state: already in use\n"); return false; } state->in_use = true; return true; } static __always_inline bool release_local_state(struct scap_bpf_per_cpu_state *state) { if(!state->in_use) { bpf_printk("release_local_state: already not in use\n"); return false; } state->in_use = false; return true; } static __always_inline int init_filler_data(void *ctx, struct filler_data *data, bool is_syscall) { unsigned int cpu; data->ctx = ctx; data->settings = get_bpf_settings(); if(!data->settings) return PPM_FAILURE_BUG; cpu = bpf_get_smp_processor_id(); data->buf = get_frame_scratch_area(cpu); if(!data->buf) return PPM_FAILURE_BUG; data->state = get_local_state(cpu); if(!data->state) return PPM_FAILURE_BUG; data->tmp_scratch = get_tmp_scratch_area(cpu); if(!data->tmp_scratch) return PPM_FAILURE_BUG; data->evt = get_event_info(data->state->tail_ctx.evt_type); if(!data->evt) return PPM_FAILURE_BUG; data->filler_info = get_event_filler_info(data->state->tail_ctx.evt_type); if(!data->filler_info) return PPM_FAILURE_BUG; #ifndef BPF_SUPPORTS_RAW_TRACEPOINTS if(is_syscall) { data->args = unstash_args(); if(!data->args) return PPM_SKIP_EVENT; } #endif data->curarg_already_on_frame = false; data->fd = -1; return PPM_SUCCESS; } static __always_inline int bpf_test_bit(int nr, unsigned long *addr) { return 1UL & (_READ(addr[BIT_WORD(nr)]) >> (nr & (BITS_PER_LONG - 1))); } #if defined(CAPTURE_SCHED_PROC_FORK) || defined(CAPTURE_SCHED_PROC_EXEC) static __always_inline bool bpf_drop_syscall_exit_events(void *ctx, ppm_event_code evt_type) { long ret = 0; switch(evt_type) { /* On s390x, clone and fork child events will be generated but * due to page faults, no args/envp information will be collected. * Also no child events appear for clone3 syscall. * * Because child events are covered by CAPTURE_SCHED_PROC_FORK, * let proactively ignore them. */ #ifdef CAPTURE_SCHED_PROC_FORK case PPME_SYSCALL_CLONE_20_X: case PPME_SYSCALL_FORK_20_X: case PPME_SYSCALL_VFORK_20_X: case PPME_SYSCALL_CLONE3_X: ret = bpf_syscall_get_retval(ctx); /* We ignore only child events, so ret == 0! */ return ret == 0; #endif /* If `CAPTURE_SCHED_PROC_EXEC` logic is enabled we collect execve-family * exit events through a dedicated tracepoint so we can ignore them here. */ #ifdef CAPTURE_SCHED_PROC_EXEC case PPME_SYSCALL_EXECVE_19_X: case PPME_SYSCALL_EXECVEAT_X: ret = bpf_syscall_get_retval(ctx); /* We ignore only successful events, so ret == 0! */ return ret == 0; #endif default: break; } return false; } #endif static __always_inline bool drop_event(void *ctx, struct scap_bpf_per_cpu_state *state, ppm_event_code evt_type, struct scap_bpf_settings *settings, enum syscall_flags drop_flags) { if(!settings->dropping_mode) return false; switch(evt_type) { case PPME_SYSCALL_CLOSE_X: case PPME_SOCKET_BIND_X: { long ret = bpf_syscall_get_retval(ctx); if(ret < 0) return true; break; } case PPME_SYSCALL_CLOSE_E: { struct sys_enter_args *args; struct files_struct *files; struct task_struct *task; unsigned long *open_fds; struct fdtable *fdt; int close_fd; int max_fds; close_fd = bpf_syscall_get_argument_from_ctx(ctx, 0); if(close_fd < 0) return true; task = (struct task_struct *)bpf_get_current_task(); if(!task) break; files = _READ(task->files); if(!files) break; fdt = _READ(files->fdt); if(!fdt) break; max_fds = _READ(fdt->max_fds); if(close_fd >= max_fds) return true; open_fds = _READ(fdt->open_fds); if(!open_fds) break; if(!bpf_test_bit(close_fd, open_fds)) return true; break; } case PPME_SYSCALL_FCNTL_E: case PPME_SYSCALL_FCNTL_X: { long cmd = bpf_syscall_get_argument_from_ctx(ctx, 1); if(cmd != F_DUPFD && cmd != F_DUPFD_CLOEXEC) return true; break; } default: break; } if(drop_flags & UF_NEVER_DROP) return false; if(drop_flags & UF_ALWAYS_DROP) return true; if(state->tail_ctx.ts % 1000000000 >= 1000000000 / settings->sampling_ratio) { if(!settings->is_dropping) { settings->is_dropping = true; state->tail_ctx.evt_type = PPME_DROP_E; return false; } return true; } if(settings->is_dropping) { settings->is_dropping = false; state->tail_ctx.evt_type = PPME_DROP_X; return false; } return false; } static __always_inline void reset_tail_ctx(struct scap_bpf_per_cpu_state *state, ppm_event_code evt_type, unsigned long long ts) { state->tail_ctx.evt_type = evt_type; state->tail_ctx.ts = ts; state->tail_ctx.curarg = 0; state->tail_ctx.curoff = 0; state->tail_ctx.len = 0; state->tail_ctx.prev_res = 0; } static __always_inline void call_filler(void *ctx, void *stack_ctx, ppm_event_code evt_type, enum syscall_flags drop_flags, int socketcall_syscall_id) { struct scap_bpf_settings *settings; const struct ppm_event_entry *filler_info; struct scap_bpf_per_cpu_state *state; unsigned long long pid; unsigned long long ts; unsigned int cpu; cpu = bpf_get_smp_processor_id(); state = get_local_state(cpu); if(!state) return; settings = get_bpf_settings(); if(!settings) return; if(!acquire_local_state(state)) return; if(cpu == 0 && state->hotplug_cpu != 0) { evt_type = PPME_CPU_HOTPLUG_E; drop_flags = UF_NEVER_DROP; } ts = settings->boot_time + bpf_ktime_get_boot_ns(); reset_tail_ctx(state, evt_type, ts); /* drop_event can change state->tail_ctx.evt_type */ if(drop_event(stack_ctx, state, evt_type, settings, drop_flags)) goto cleanup; ++state->n_evts; state->tail_ctx.socketcall_syscall_id = socketcall_syscall_id; filler_info = get_event_filler_info(state->tail_ctx.evt_type); if(!filler_info) goto cleanup; bpf_tail_call(ctx, &tail_map, filler_info->filler_id); bpf_printk("Can't tail call filler evt=%d, filler=%d\n", state->tail_ctx.evt_type, filler_info->filler_id); cleanup: release_local_state(state); } #ifdef BPF_SUPPORTS_RAW_TRACEPOINTS static __always_inline long convert_network_syscalls(void *ctx, bool *is_syscall) { int socketcall_id = (int)bpf_syscall_get_argument_from_ctx(ctx, 0); return socketcall_code_to_syscall_code(socketcall_id, is_syscall); } #endif #endif