ebpf
Compare original and translation side by side
🇺🇸
Original
English🇨🇳
Translation
ChineseeBPF
eBPF
Purpose
用途
Guide agents through writing, loading, and debugging eBPF programs using libbpf, bpftrace, and bpftool. Covers map types, program types, verifier errors, XDP networking, and CO-RE portability.
指导开发者使用libbpf、bpftrace和bpftool编写、加载及调试eBPF程序。涵盖映射表类型、程序类型、验证器错误、XDP网络以及CO-RE可移植性等内容。
Triggers
触发场景
- "How do I write an eBPF program to trace system calls?"
- "My eBPF program fails with a verifier error"
- "How do I use bpftrace to trace kernel events?"
- "How do I share data between kernel eBPF and userspace?"
- "How do I write an XDP program for packet filtering?"
- "How do I make my eBPF program portable across kernel versions (CO-RE)?"
- "如何编写用于追踪系统调用的eBPF程序?"
- "我的eBPF程序因验证器错误运行失败"
- "如何使用bpftrace追踪内核事件?"
- "如何在内核态eBPF与用户态之间共享数据?"
- "如何编写用于数据包过滤的XDP程序?"
- "如何让我的eBPF程序实现跨内核版本的可移植性(CO-RE)?"
Workflow
操作流程
1. Choose the right tool
1. 选择合适的工具
Goal?
├── One-liner kernel tracing / scripting → bpftrace
├── Production eBPF program with userspace → libbpf (C) or aya (Rust)
├── Inspect loaded programs and maps → bpftool
└── High-performance packet processing → XDP + libbpfGoal?
├── One-liner kernel tracing / scripting → bpftrace
├── Production eBPF program with userspace → libbpf (C) or aya (Rust)
├── Inspect loaded programs and maps → bpftool
└── High-performance packet processing → XDP + libbpf2. bpftrace — quick kernel tracing
2. bpftrace — 快速内核追踪
bash
undefinedbash
undefinedTrace all execve calls with comm and args
Trace all execve calls with comm and args
bpftrace -e 'tracepoint:syscalls:sys_enter_execve { printf("%s %s\n", comm, str(args->filename)); }'
bpftrace -e 'tracepoint:syscalls:sys_enter_execve { printf("%s %s\n", comm, str(args->filename)); }'
Count syscalls by process
Count syscalls by process
bpftrace -e 'tracepoint:raw_syscalls:sys_enter { @[comm] = count(); }'
bpftrace -e 'tracepoint:raw_syscalls:sys_enter { @[comm] = count(); }'
Latency histogram for read() syscall
Latency histogram for read() syscall
bpftrace -e '
tracepoint:syscalls:sys_enter_read { @start[tid] = nsecs; }
tracepoint:syscalls:sys_exit_read { @us = hist((nsecs - @start[tid]) / 1000); delete(@start[tid]); }'
bpftrace -e '
tracepoint:syscalls:sys_enter_read { @start[tid] = nsecs; }
tracepoint:syscalls:sys_exit_read { @us = hist((nsecs - @start[tid]) / 1000); delete(@start[tid]); }'
List available tracepoints
List available tracepoints
bpftrace -l 'tracepoint:syscalls:'
bpftrace -l 'kprobe:tcp_'
undefinedbpftrace -l 'tracepoint:syscalls:'
bpftrace -l 'kprobe:tcp_'
undefined3. libbpf skeleton — minimal C program
3. libbpf骨架 — 极简C程序
c
// counter.bpf.c — kernel-side
#include <vmlinux.h>
#include <bpf/bpf_helpers.h>
struct {
__uint(type, BPF_MAP_TYPE_HASH);
__type(key, u32);
__type(value, u64);
__uint(max_entries, 1024);
} call_count SEC(".maps");
SEC("tracepoint/syscalls/sys_enter_read")
int trace_read(struct trace_event_raw_sys_enter *ctx)
{
u32 pid = bpf_get_current_pid_tgid() >> 32;
u64 *cnt = bpf_map_lookup_elem(&call_count, &pid);
if (cnt)
(*cnt)++;
else {
u64 one = 1;
bpf_map_update_elem(&call_count, &pid, &one, BPF_ANY);
}
return 0;
}
char LICENSE[] SEC("license") = "GPL";c
// counter.c — userspace loader
#include "counter.skel.h"
int main(void) {
struct counter_bpf *skel = counter_bpf__open_and_load();
counter_bpf__attach(skel);
// read map, print results
counter_bpf__destroy(skel);
}bash
undefinedc
// counter.bpf.c — kernel-side
#include <vmlinux.h>
#include <bpf/bpf_helpers.h>
struct {
__uint(type, BPF_MAP_TYPE_HASH);
__type(key, u32);
__type(value, u64);
__uint(max_entries, 1024);
} call_count SEC(".maps");
SEC("tracepoint/syscalls/sys_enter_read")
int trace_read(struct trace_event_raw_sys_enter *ctx)
{
u32 pid = bpf_get_current_pid_tgid() >> 32;
u64 *cnt = bpf_map_lookup_elem(&call_count, &pid);
if (cnt)
(*cnt)++;
else {
u64 one = 1;
bpf_map_update_elem(&call_count, &pid, &one, BPF_ANY);
}
return 0;
}
char LICENSE[] SEC("license") = "GPL";c
// counter.c — userspace loader
#include "counter.skel.h"
int main(void) {
struct counter_bpf *skel = counter_bpf__open_and_load();
counter_bpf__attach(skel);
// read map, print results
counter_bpf__destroy(skel);
}bash
undefinedBuild with libbpf
Build with libbpf
clang -g -O2 -target bpf -D__TARGET_ARCH_x86 -I/usr/include/bpf
-c counter.bpf.c -o counter.bpf.o bpftool gen skeleton counter.bpf.o > counter.skel.h gcc -o counter counter.c -lbpf -lelf -lz
-c counter.bpf.c -o counter.bpf.o bpftool gen skeleton counter.bpf.o > counter.skel.h gcc -o counter counter.c -lbpf -lelf -lz
undefinedclang -g -O2 -target bpf -D__TARGET_ARCH_x86 -I/usr/include/bpf
-c counter.bpf.c -o counter.bpf.o bpftool gen skeleton counter.bpf.o > counter.skel.h gcc -o counter counter.c -lbpf -lelf -lz
-c counter.bpf.c -o counter.bpf.o bpftool gen skeleton counter.bpf.o > counter.skel.h gcc -o counter counter.c -lbpf -lelf -lz
undefined4. eBPF map types
4. eBPF映射表类型
| Map type | Key→Value | Use case |
|---|---|---|
| arbitrary→arbitrary | Per-PID counters, state |
| u32→fixed | Config, metrics indexed by CPU |
| key→per-CPU val | High-frequency counters without locks |
| — | Efficient kernel→userspace events |
| — | Legacy perf event output |
| key→val | Connection tracking, limited size |
| u32→prog | Tail calls, program chaining |
| — | AF_XDP socket redirection |
Use over for new code — lower overhead, variable-size records.
BPF_MAP_TYPE_RINGBUFPERF_EVENT_ARRAY| 映射表类型 | 键→值 | 适用场景 |
|---|---|---|
| 任意类型→任意类型 | 按PID统计的计数器、状态存储 |
| u32→固定类型 | 配置存储、按CPU索引的指标 |
| 键→每CPU值 | 无锁的高频计数器 |
| — | 高效的内核→用户态事件传输 |
| — | 传统perf事件输出 |
| 键→值 | 连接追踪、有限容量存储 |
| u32→程序 | 尾调用、程序链式调用 |
| — | AF_XDP套接字重定向 |
新代码中优先使用而非——它的开销更低,支持可变大小的记录。
BPF_MAP_TYPE_RINGBUFPERF_EVENT_ARRAY5. Verifier error triage
5. 验证器错误排查
| Error message | Root cause | Fix |
|---|---|---|
| Dereferencing unbounded pointer | Check pointer with null test before use |
| Return without setting R0 | Ensure all paths set a return value |
| Branch target beyond program end | Restructure conditionals |
| Backward jump (loop) | Use |
| Dead code after return | Remove dead branches |
| Stack read of uninitialised bytes | Zero-init structs: |
| Pointer arithmetic off alignment | Align reads to |
bash
undefined| 错误信息 | 根本原因 | 修复方案 |
|---|---|---|
| 解引用未受限的指针 | 使用前先对指针进行空值检查 |
| 返回时未设置R0寄存器 | 确保所有代码路径都设置了返回值 |
| 分支目标超出程序范围 | 重构条件语句 |
| 向后跳转(循环) | 使用 |
| 返回语句后存在死代码 | 删除无用分支 |
| 读取栈上未初始化的字节 | 零初始化结构体: |
| 指针算术运算导致对齐错误 | 将读取操作对齐到 |
bash
undefinedGet detailed verifier log
Get detailed verifier log
bpftool prog load prog.bpf.o /sys/fs/bpf/prog type kprobe
2>&1 | head -100
2>&1 | head -100
bpftool prog load prog.bpf.o /sys/fs/bpf/prog type kprobe
2>&1 | head -100
2>&1 | head -100
Check loaded programs
Check loaded programs
bpftool prog list
bpftool prog dump xlated id 42
undefinedbpftool prog list
bpftool prog dump xlated id 42
undefined6. XDP programs
6. XDP程序
c
// xdp_drop_icmp.bpf.c
#include <vmlinux.h>
#include <bpf/bpf_helpers.h>
#include <bpf/bpf_endian.h>
SEC("xdp")
int xdp_filter(struct xdp_md *ctx)
{
void *data_end = (void *)(long)ctx->data_end;
void *data = (void *)(long)ctx->data;
struct ethhdr *eth = data;
if ((void *)(eth + 1) > data_end)
return XDP_PASS;
if (bpf_ntohs(eth->h_proto) != ETH_P_IP)
return XDP_PASS;
struct iphdr *ip = (void *)(eth + 1);
if ((void *)(ip + 1) > data_end)
return XDP_PASS;
if (ip->protocol == IPPROTO_ICMP)
return XDP_DROP;
return XDP_PASS;
}
char LICENSE[] SEC("license") = "GPL";bash
undefinedc
// xdp_drop_icmp.bpf.c
#include <vmlinux.h>
#include <bpf/bpf_helpers.h>
#include <bpf/bpf_endian.h>
SEC("xdp")
int xdp_filter(struct xdp_md *ctx)
{
void *data_end = (void *)(long)ctx->data_end;
void *data = (void *)(long)ctx->data;
struct ethhdr *eth = data;
if ((void *)(eth + 1) > data_end)
return XDP_PASS;
if (bpf_ntohs(eth->h_proto) != ETH_P_IP)
return XDP_PASS;
struct iphdr *ip = (void *)(eth + 1);
if ((void *)(ip + 1) > data_end)
return XDP_PASS;
if (ip->protocol == IPPROTO_ICMP)
return XDP_DROP;
return XDP_PASS;
}
char LICENSE[] SEC("license") = "GPL";bash
undefinedAttach XDP program to interface
Attach XDP program to interface
ip link set dev eth0 xdp obj xdp_drop_icmp.bpf.o sec xdp
ip link set dev eth0 xdp obj xdp_drop_icmp.bpf.o sec xdp
Remove
Remove
ip link set dev eth0 xdp off
ip link set dev eth0 xdp off
Use native (driver) mode for best performance
Use native (driver) mode for best performance
ip link set dev eth0 xdp obj prog.bpf.o sec xdp mode native
XDP return codes: `XDP_PASS`, `XDP_DROP`, `XDP_TX` (hairpin), `XDP_REDIRECT`.ip link set dev eth0 xdp obj prog.bpf.o sec xdp mode native
XDP返回码:`XDP_PASS`、`XDP_DROP`、`XDP_TX`(回环)、`XDP_REDIRECT`。7. CO-RE — compile once, run everywhere
7. CO-RE — 一次编译,随处运行
CO-RE (Compile Once - Run Everywhere) uses BTF type info to relocate field accesses at load time.
c
// Use BTF-based field access (CO-RE aware)
#include <vmlinux.h> // generated from running kernel's BTF
#include <bpf/bpf_core_read.h>
SEC("kprobe/tcp_connect")
int trace_connect(struct pt_regs *ctx)
{
struct sock *sk = (struct sock *)PT_REGS_PARM1(ctx);
u16 dport = BPF_CORE_READ(sk, __sk_common.skc_dport);
// BPF_CORE_READ relocates the field offset at load time
bpf_printk("connect to port %d\n", bpf_ntohs(dport));
return 0;
}bash
undefinedCO-RE(一次编译,随处运行)利用BTF类型信息在加载时重定位字段访问偏移。
c
// Use BTF-based field access (CO-RE aware)
#include <vmlinux.h> // generated from running kernel's BTF
#include <bpf/bpf_core_read.h>
SEC("kprobe/tcp_connect")
int trace_connect(struct pt_regs *ctx)
{
struct sock *sk = (struct sock *)PT_REGS_PARM1(ctx);
u16 dport = BPF_CORE_READ(sk, __sk_common.skc_dport);
// BPF_CORE_READ relocates the field offset at load time
bpf_printk("connect to port %d\n", bpf_ntohs(dport));
return 0;
}bash
undefinedGenerate vmlinux.h from running kernel
Generate vmlinux.h from running kernel
bpftool btf dump file /sys/kernel/btf/vmlinux format c > vmlinux.h
bpftool btf dump file /sys/kernel/btf/vmlinux format c > vmlinux.h
Verify BTF is enabled
Verify BTF is enabled
ls /sys/kernel/btf/vmlinux
For the full map types reference, see [references/ebpf-map-types.md](references/ebpf-map-types.md).ls /sys/kernel/btf/vmlinux
完整的映射表类型参考,请查看[references/ebpf-map-types.md](references/ebpf-map-types.md)。Related skills
相关技能
- Use for Aya framework Rust eBPF programs
skills/observability/ebpf-rust - Use for perf-based tracing without eBPF
skills/profilers/linux-perf - Use for seccomp-bpf syscall filtering
skills/runtimes/binary-hardening - Use for kernel module development
skills/low-level-programming/linux-kernel-modules
- 若使用Aya框架开发Rust版eBPF程序,请使用
skills/observability/ebpf-rust - 若无需eBPF,基于perf的追踪请使用
skills/profilers/linux-perf - 若进行seccomp-bpf系统调用过滤,请使用
skills/runtimes/binary-hardening - 若开发内核模块,请使用
skills/low-level-programming/linux-kernel-modules