Linux eBPF 错误:invalid bpf_context access

1. 前言

限于作者能力水平,本文可能存在谬误,因此而给读者带来的损失,作者不做任何承诺。

2. 案例

笔者想通过 eBPF 程序抓取网络 skbip,port 信息,于是基于 Linux 4.14.x 内核自带 sample 程序 sockex1_kern.csockex1_user.c,做了点简单修改。修改后的 sockex1_kern.c 如下:

#include <linux/bpf.h>
#include <linux/if_ether.h>
#include <linux/if_packet.h>
#include <linux/ip.h>
#include "bpf_helpers.h"

#include <stddef.h>

typedef unsigned int u32;

struct bpf_map_def SEC("maps") my_map = {
	.type = BPF_MAP_TYPE_ARRAY,
	.key_size = sizeof(u32),
	.value_size = sizeof(long long) * 4,
	.max_entries = 256,
};

SEC("socket1")
int bpf_prog1(struct __sk_buff *skb)
{
	int index = load_byte(skb, ETH_HLEN + offsetof(struct iphdr, protocol));
	long long *value;

	if (skb->pkt_type != PACKET_OUTGOING)
		return 0;

	value = bpf_map_lookup_elem(&my_map, &index);
	if (value) {
		value[0] = skb->remote_ip4;
		value[1] = skb->remote_port;
		value[2] = skb->local_ip4;
		value[3] = skb->local_port;
	}

	return 0;
}

char _license[] SEC("license") = "GPL";

修改后的 sockex1_user.c 如下:

// SPDX-License-Identifier: GPL-2.0
#include <stdio.h>
#include <assert.h>
#include <linux/bpf.h>
#include "libbpf.h"
#include "bpf_load.h"
#include "sock_example.h"
#include <unistd.h>
#include <arpa/inet.h>
#include <linux/net_tstamp.h>

int main(int argc, char **argv)
{
	char filename[256];
	FILE *f;
	int i, sock;
	
	if (argc != 2) {
		printf("usage: %s <bpf-prog-name>\n", argv[0]);
		return 0;
	}

	snprintf(filename, sizeof(filename), "%s_kern.o", argv[1]);
	
	if (load_bpf_file(filename)) {
		printf("%s(): %s", __func__, bpf_log_buf);
		return 1;
	}
	
	sock = open_raw_sock("lo");
	
	assert(setsockopt(sock, SOL_SOCKET, SO_ATTACH_BPF, prog_fd,
			  sizeof(prog_fd[0])) == 0);
	
	f = popen("ping -c5 localhost", "r");
	(void) f;
	
	for (i = 0; i < 5; i++) {
		long long tcp_cnt[4], udp_cnt[4], icmp_cnt[4];
		int key;

		key = IPPROTO_TCP;
		assert(bpf_map_lookup_elem(map_fd[0], &key, tcp_cnt) == 0);

		key = IPPROTO_UDP;
		assert(bpf_map_lookup_elem(map_fd[0], &key, udp_cnt) == 0);

		key = IPPROTO_ICMP;
		assert(bpf_map_lookup_elem(map_fd[0], &key, icmp_cnt) == 0);

		printf("[%d] TCP %08llx:%lld=>%08llx:%lld UDP %08llx:%lld=>%08llx:%lld ICMP %08llx:%lld=>%08llx:%lld\n",
		       i + 1, 
		       tcp_cnt[0], tcp_cnt[1], tcp_cnt[2], tcp_cnt[3], 
		       udp_cnt[0], udp_cnt[1], udp_cnt[2], udp_cnt[3],
		       icmp_cnt[0], icmp_cnt[1], icmp_cnt[2], icmp_cnt[3]);
		sleep(1);
	}

	return 0;
}

分别编译 sockex1_kern.csockex1_user.c 后,运行:

# ./sockex1_user sockex1
bpf_load_program() err=13
0: (bf) r6 = r1
1: (30) r0 = *(u8 *)skb[23]
2: (63) *(u32 *)(r10 -4) = r0
3: (61) r1 = *(u32 *)(r6 +4)
4: (55) if r1 != 0x4 goto pc+14
 R0=inv(id=0) R1=inv4 R6=ctx(id=0,off=0,imm=0) R10=fp0
5: (bf) r2 = r10
6: (07) r2 += -4
7: (18) r1 = 0x9ea5c000
9: (85) call bpf_map_lookup_elem#1
10: (15) if r0 == 0x0 goto pc+8
 R0=map_value(id=0,off=0,ks=4,vs=32,imm=0) R6=ctx(id=0,off=0,imm=0) R10=fp0
11: (61) r1 = *(u32 *)(r6 +92)
invalid bpf_context access off=92 size=4

可以看到,爆出了 invalid bpf_context access off=92 size=4 错误信息,内核返回错误码为 -13 (-EACCES)。反汇编 sockex1_kern.o

$ llvm-objdump-8 -d sockex1_kern.o

sockex1_kern.o:	file format ELF64-BPF

Disassembly of section socket1:
0000000000000000 bpf_prog1:
       0:	bf 16 00 00 00 00 00 00 	r6 = r1
       1:	30 00 00 00 17 00 00 00 	r0 = *(u8 *)skb[23]
       2:	63 0a fc ff 00 00 00 00 	*(u32 *)(r10 - 4) = r0
       3:	61 61 04 00 00 00 00 00 	r1 = *(u32 *)(r6 + 4)
       4:	55 01 0e 00 04 00 00 00 	if r1 != 4 goto +14 <LBB0_3>
       5:	bf a2 00 00 00 00 00 00 	r2 = r10
       6:	07 02 00 00 fc ff ff ff 	r2 += -4
       7:	18 01 00 00 00 00 00 00 00 00 00 00 00 00 00 00 	r1 = 0 ll
       9:	85 00 00 00 01 00 00 00 	call 1
      10:	15 00 08 00 00 00 00 00 	if r0 == 0 goto +8 <LBB0_3>
      11:	61 61 5c 00 00 00 00 00 	r1 = *(u32 *)(r6 + 92) // (1) value[0] = skb->remote_ip4;
      12:	7b 10 00 00 00 00 00 00 	*(u64 *)(r0 + 0) = r1
      13:	61 61 84 00 00 00 00 00 	r1 = *(u32 *)(r6 + 132)
      14:	7b 10 08 00 00 00 00 00 	*(u64 *)(r0 + 8) = r1
      15:	61 61 60 00 00 00 00 00 	r1 = *(u32 *)(r6 + 96)
      16:	7b 10 10 00 00 00 00 00 	*(u64 *)(r0 + 16) = r1
      17:	61 61 88 00 00 00 00 00 	r1 = *(u32 *)(r6 + 136)
      18:	7b 10 18 00 00 00 00 00 	*(u64 *)(r0 + 24) = r1

0000000000000098 LBB0_3:
      19:	b7 00 00 00 00 00 00 00 	r0 = 0
      20:	95 00 00 00 00 00 00 00 	exit

结合 struct __sk_buff 的定义:

34fb102ea53194370bfc7e987d628ad9

可知,invalid bpf_context access off=92 size=4 错误信息对应反汇编代码注释 (1) 处,对应代码 sockex1_kern.c 的语句 value[0] = skb->remote_ip4;,但为什么会返回错误码 -13 (-EACCES)?最终定位跟踪如下内核代码路径:

sys_bpf()
	bpf_prog_load()
		bpf_check()
			do_check()
				check_mem_access()
					check_ctx_access()
						env->prog->aux->ops->is_valid_access(off, size, t, &info)
							sk_filter_is_valid_access()
							
static bool sk_filter_is_valid_access(int off, int size,
				      enum bpf_access_type type,
				      struct bpf_insn_access_aux *info)
{
	switch (off) {
	case bpf_ctx_range(struct __sk_buff, tc_classid):
	case bpf_ctx_range(struct __sk_buff, data):
	case bpf_ctx_range(struct __sk_buff, data_end):
	/* 出于安全性考虑,struct __sk_buff 结构体 family 到 local_port 的所有成员,都不允许被读 */
	case bpf_ctx_range_till(struct __sk_buff, family, local_port):
		return false;
	}

	...
}

本文测试例子中,因为 eBPF 字节码程序 sockex1_kern.o 加载到内核期间,skb->remote_ip4 的内存读操作,没有通过 sk_filter_is_valid_access() 的审查,导致报错。

posted @ 2026-01-12 23:35  JiMoKuangXiangQu  阅读(6)  评论(0)    收藏  举报