ip rule action
添加pbr 相关逻辑:https://www.cnblogs.com/codestack/p/15964315.html
fib_rules_lookup
,判断下action
接后执行ops->action
的操作,此刻应该是fib4_rule_action
,
在没有使用l3mdev
的情况下,使用rule->table
作为table id
,然后调用fib_get_table
获取该表,最后通过fib_table_lookup
进行路由项查找
-
FR_ACT_TO_TBL
:- 对应
ip rule
命令中常见的行为,它表示将数据包导向某个特定的路由表。 - 通常是用
lookup
或table
指定路由表,例如:ip rule add from 192.168.1.0/24 table 100
rule->action
会是FR_ACT_TO_TBL
,表示匹配的包应该查找指定的路由表。
- 对应
-
FR_ACT_UNREACHABLE
:- 对应
ip rule
中的unreachable
动作。 - 当规则匹配时,内核返回
-ENETUNREACH
,表示目的地不可达。 - 示例:
ip rule add from 192.168.1.0/24 unreachable
192.168.1.0/24
,内核会将该包丢弃,并返回“网络不可达”(ENETUNREACH
)错误。
- 对应
-
FR_ACT_PROHIBIT
:- 对应
ip rule
中的prohibit
动作。 - 当规则匹配时,内核返回
-EACCES
,表示访问被禁止。 - 示例:
ip rule add from 192.168.1.0/24 prohibit
192.168.1.0/24
时,包将被丢弃,并返回“权限被禁止”(EACCES
)错误。
- 对应
-
FR_ACT_BLACKHOLE
:- 对应
ip rule
中的blackhole
动作。 - 当规则匹配时,内核会简单地丢弃该包,但不会返回错误代码给发送方。
- 示例:
ip rule add from 192.168.1.0/24 blackhole
192.168.1.0/24
时,包会被悄悄丢弃,发送方不会收到任何通知。
- 对应
-
default
:- 如果
rule->action
不匹配任何已知的动作,则default
情况返回-EINVAL
,表示无效的参数或操作。 - 这个情况通常不会直接映射到
ip rule
,而是表示遇到了未定义或无效的动作
- 如果
FR_ACT_GOTO
- 在 Linux 内核中,
FR_ACT_GOTO
的实现会修改规则的流控制。当当前规则命中并且操作为FR_ACT_GOTO
时,内核并不会立即对该规则采取实际的路由决策, - 而是转到另一个指定的规则继续匹配。这可以允许用户定义一系列相互关联的规则。
使用 ip rule 时可以通过 goto 关键字实现这个行为。 ip rule add from 192.168.1.0/24 table 100 ip rule add from 10.0.0.0/8 goto 100
这意味着源自
10.0.0.0/8
的数据包将跳转到规则编号为100
继续匹配
FR_ACT_TO_TBL
对应ip rule
中指定查找路由表的行为(lookup
或table
)。FR_ACT_UNREACHABLE
对应ip rule add ... unreachable
。FR_ACT_PROHIBIT
对应ip rule add ... prohibit
。FR_ACT_BLACKHOLE
对应ip rule add ... blackhole
。default
是处理未知或无效动作的情况,返回-EINVAL
错误。
int fib_rules_lookup(struct fib_rules_ops *ops, struct flowi *fl, int flags, struct fib_lookup_arg *arg) { struct fib_rule *rule; int err; rcu_read_lock(); list_for_each_entry_rcu(rule, &ops->rules_list, list) { jumped:
// 查询条件如果和策略路由规则不匹配,直接尝试匹配下一条 if (!fib_rule_match(rule, ops, fl, flags, arg)) continue; // 规则匹配了,下面根据该路由规则的action执行后续动作 if (rule->action == FR_ACT_GOTO) {// action是跳转到另外一条策略路由规则继续匹配,那么进行跳转 struct fib_rule *target; target = rcu_dereference(rule->ctarget); if (target == NULL) {// 由于允许先指定一个不存在的target规则,所以这里需要特殊处理一下 continue; } else { rule = target;// 跳转,然后继续匹配 goto jumped; } } else if (rule->action == FR_ACT_NOP) continue;// 如果规则的action是什么都不做,那么继续遍历下一条 else err = INDIRECT_CALL_MT(ops->action, fib6_rule_action, fib4_rule_action, rule, fl, flags, arg); if (!err && ops->suppress && INDIRECT_CALL_MT(ops->suppress, fib6_rule_suppress, fib4_rule_suppress, rule, arg)) continue; if (err != -EAGAIN) { if ((arg->flags & FIB_LOOKUP_NOREF) || likely(refcount_inc_not_zero(&rule->refcnt))) { arg->rule = rule; goto out; } break; } } err = -ESRCH; out: rcu_read_unlock(); return err; }
在没有使用l3mdev
的情况下,使用rule->table
作为table id
,然后调用fib_get_table
获取该表,最后通过fib_table_lookup
进行路由项查找。
int fib4_rule_action(struct fib_rule *rule, struct flowi *flp, int flags, struct fib_lookup_arg *arg) { int err = -EAGAIN; struct fib_table *tbl; u32 tb_id; switch (rule->action) { case FR_ACT_TO_TBL: break; case FR_ACT_UNREACHABLE: return -ENETUNREACH; case FR_ACT_PROHIBIT: return -EACCES; case FR_ACT_BLACKHOLE: default: return -EINVAL; } rcu_read_lock(); tb_id = fib_rule_get_table(rule, arg); tbl = fib_get_table(rule->fr_net, tb_id); if (tbl) err = fib_table_lookup(tbl, &flp->u.ip4, (struct fib_result *)arg->result, arg->flags); rcu_read_unlock(); return err; }
fib_table_lookup
这个函数太长 看不懂!!!只知道最后的结果是返回的err
判断条件基本都是看最后一个,也就是关于scope
的长度。 flp->flowi4_scope
初始化是RT_SCOPE_UNIVERSE=0
,然后在ip_route_output_key_hash
中设置fl4->flowi4_scope = ((tos & RTO_ONLINK) ?RT_SCOPE_LINK : RT_SCOPE_UNIVERSE);
也还是RT_SCOPE_UNIVERSE
(因为IP_TOS default=0),这个条件就是任何路由都会满足。
如果设置了
MSG_DONTROUTE
,则TOS = RTO_ONLINK
,从而导致scope = RT_SCOPE_LINK
往下走是叶子节点的链表遍历。 fa
是fib_alias
对应的是一条路由,多个fib_alias
可以共享一个相同的fib_info
,这是真实路由信息,比如设备,下一跳什么的,而其中的fib_info->fib_nh[nhsel]
代表了下一跳地址。这儿的nhsel
一般是1
,除非是多路径支持,不然一条路由一般只有一个下一跳
/* rtm_scope Really it is not scope, but sort of distance to the destination. NOWHERE are reserved for not existing destinations, HOST is our local addresses, LINK are destinations, located on directly attached link and UNIVERSE is everywhere in the Universe. Intermediate values are also possible f.e. interior routes could be assigned a value between UNIVERSE and LINK. */ enum rt_scope_t { RT_SCOPE_UNIVERSE=0, /* User defined values */ RT_SCOPE_SITE=200, RT_SCOPE_LINK=253, RT_SCOPE_HOST=254, RT_SCOPE_NOWHERE=255 };
看
/* this line carries forward the xor from earlier in the function */ index = key ^ n->key; /* Step 3: Process the leaf, if that fails fall back to backtracing */ hlist_for_each_entry_rcu(fa, &n->leaf, fa_list) { struct fib_info *fi = fa->fa_info; struct fib_nh_common *nhc; int nhsel, err; if ((BITS_PER_LONG > KEYLENGTH) || (fa->fa_slen < KEYLENGTH)) { if (index >= (1ul << fa->fa_slen)) continue; } if (fa->fa_tos && fa->fa_tos != flp->flowi4_tos) continue; if (fi->fib_dead) continue; if (fa->fa_info->fib_scope < flp->flowi4_scope) continue; fib_alias_accessed(fa); err = fib_props[fa->fa_type].error; if (unlikely(err < 0)) { out_reject: #ifdef CONFIG_IP_FIB_TRIE_STATS this_cpu_inc(stats->semantic_match_passed); #endif trace_fib_table_lookup(tb->tb_id, flp, NULL, err); return err; } if (fi->fib_flags & RTNH_F_DEAD) continue; if (unlikely(fi->nh)) { if (nexthop_is_blackhole(fi->nh)) { err = fib_props[RTN_BLACKHOLE].error; goto out_reject; } nhc = nexthop_get_nhc_lookup(fi->nh, fib_flags, flp, &nhsel); if (nhc) goto set_result; goto miss; } for (nhsel = 0; nhsel < fib_info_num_path(fi); nhsel++) { nhc = fib_info_nhc(fi, nhsel); if (!fib_lookup_good_nhc(nhc, fib_flags, flp)) continue; set_result: if (!(fib_flags & FIB_LOOKUP_NOREF)) refcount_inc(&fi->fib_clntref); res->prefix = htonl(n->key); res->prefixlen = KEYLENGTH - fa->fa_slen; res->nh_sel = nhsel; res->nhc = nhc; res->type = fa->fa_type; res->scope = fi->fib_scope; res->fi = fi; res->table = tb; res->fa_head = &n->leaf; #ifdef CONFIG_IP_FIB_TRIE_STATS this_cpu_inc(stats->semantic_match_passed); #endif trace_fib_table_lookup(tb->tb_id, flp, nhc, err); return err; }
需要的注意的是返回的err有那些选择?
err = fib_props[fa->fa_type].error;
const struct fib_prop fib_props[RTN_MAX + 1] = { [RTN_UNSPEC] = { .error = 0, .scope = RT_SCOPE_NOWHERE, }, [RTN_UNICAST] = { .error = 0, .scope = RT_SCOPE_UNIVERSE, }, [RTN_LOCAL] = { .error = 0, .scope = RT_SCOPE_HOST, }, [RTN_BROADCAST] = { .error = 0, .scope = RT_SCOPE_LINK, }, [RTN_ANYCAST] = { .error = 0, .scope = RT_SCOPE_LINK, }, [RTN_MULTICAST] = { .error = 0, .scope = RT_SCOPE_UNIVERSE, }, [RTN_BLACKHOLE] = { .error = -EINVAL, .scope = RT_SCOPE_UNIVERSE, }, [RTN_UNREACHABLE] = { .error = -EHOSTUNREACH, .scope = RT_SCOPE_UNIVERSE, }, [RTN_PROHIBIT] = { .error = -EACCES, .scope = RT_SCOPE_UNIVERSE, }, [RTN_THROW] = { .error = -EAGAIN, .scope = RT_SCOPE_UNIVERSE, }, [RTN_NAT] = { .error = -EINVAL, .scope = RT_SCOPE_NOWHERE, }, [RTN_XRESOLVE] = { .error = -EINVAL, .scope = RT_SCOPE_NOWHERE, }, };
对于访问lo接口流量:
local 127.0.0.0/8 dev lo proto kernel scope host src 127.0.0.1 local 127.0.0.1 dev lo proto kernel scope host src 127.0.0.1
一般命中上述路由;最后返回的err = 0
,又因为res->type = RTN_LOCAL
,且fl4->saddr = 0
,
所以fl4->saddr = 127.0.0.1
且fl4->flowi4_oif = loopback_dev
,
if (res->type == RTN_LOCAL) { if (!fl4->saddr) { if (res->fi->fib_prefsrc) fl4->saddr = res->fi->fib_prefsrc; else fl4->saddr = fl4->daddr; }
接着创建路由缓存条目__mkroute_output
。
直接看到rt_set_nexthop(rth, fl4->daddr, res, fnhe, fi, type, 0, do_cache);
,