ip rule action
添加pbr 相关逻辑:https://www.cnblogs.com/codestack/p/15964315.html
fib_rules_lookup,判断下action接后执行ops->action的操作,此刻应该是fib4_rule_action,
在没有使用l3mdev的情况下,使用rule->table作为table id,然后调用fib_get_table获取该表,最后通过fib_table_lookup进行路由项查找
-
FR_ACT_TO_TBL:- 对应
ip rule命令中常见的行为,它表示将数据包导向某个特定的路由表。 - 通常是用
lookup或table指定路由表,例如:在这种情况下,ip rule add from 192.168.1.0/24 table 100rule->action会是FR_ACT_TO_TBL,表示匹配的包应该查找指定的路由表。
- 对应
-
FR_ACT_UNREACHABLE:- 对应
ip rule中的unreachable动作。 - 当规则匹配时,内核返回
-ENETUNREACH,表示目的地不可达。 - 示例:
这意味着如果数据包的源地址来自
ip rule add from 192.168.1.0/24 unreachable192.168.1.0/24,内核会将该包丢弃,并返回“网络不可达”(ENETUNREACH)错误。
- 对应
-
FR_ACT_PROHIBIT:- 对应
ip rule中的prohibit动作。 - 当规则匹配时,内核返回
-EACCES,表示访问被禁止。 - 示例:
这意味着当数据包的源地址匹配
ip rule add from 192.168.1.0/24 prohibit192.168.1.0/24时,包将被丢弃,并返回“权限被禁止”(EACCES)错误。
- 对应
-
FR_ACT_BLACKHOLE:- 对应
ip rule中的blackhole动作。 - 当规则匹配时,内核会简单地丢弃该包,但不会返回错误代码给发送方。
- 示例:
这意味着当数据包的源地址匹配
ip rule add from 192.168.1.0/24 blackhole192.168.1.0/24时,包会被悄悄丢弃,发送方不会收到任何通知。
- 对应
-
default:- 如果
rule->action不匹配任何已知的动作,则default情况返回-EINVAL,表示无效的参数或操作。 - 这个情况通常不会直接映射到
ip rule,而是表示遇到了未定义或无效的动作
- 如果
FR_ACT_GOTO- 在 Linux 内核中,
FR_ACT_GOTO的实现会修改规则的流控制。当当前规则命中并且操作为FR_ACT_GOTO时,内核并不会立即对该规则采取实际的路由决策, - 而是转到另一个指定的规则继续匹配。这可以允许用户定义一系列相互关联的规则。
使用 ip rule 时可以通过 goto 关键字实现这个行为。 ip rule add from 192.168.1.0/24 table 100 ip rule add from 10.0.0.0/8 goto 100
这意味着源自
10.0.0.0/8的数据包将跳转到规则编号为100继续匹配
FR_ACT_TO_TBL对应ip rule中指定查找路由表的行为(lookup或table)。FR_ACT_UNREACHABLE对应ip rule add ... unreachable。FR_ACT_PROHIBIT对应ip rule add ... prohibit。FR_ACT_BLACKHOLE对应ip rule add ... blackhole。default是处理未知或无效动作的情况,返回-EINVAL错误。
int fib_rules_lookup(struct fib_rules_ops *ops, struct flowi *fl, int flags, struct fib_lookup_arg *arg) { struct fib_rule *rule; int err; rcu_read_lock(); list_for_each_entry_rcu(rule, &ops->rules_list, list) { jumped:
// 查询条件如果和策略路由规则不匹配,直接尝试匹配下一条 if (!fib_rule_match(rule, ops, fl, flags, arg)) continue; // 规则匹配了,下面根据该路由规则的action执行后续动作 if (rule->action == FR_ACT_GOTO) {// action是跳转到另外一条策略路由规则继续匹配,那么进行跳转 struct fib_rule *target; target = rcu_dereference(rule->ctarget); if (target == NULL) {// 由于允许先指定一个不存在的target规则,所以这里需要特殊处理一下 continue; } else { rule = target;// 跳转,然后继续匹配 goto jumped; } } else if (rule->action == FR_ACT_NOP) continue;// 如果规则的action是什么都不做,那么继续遍历下一条 else err = INDIRECT_CALL_MT(ops->action, fib6_rule_action, fib4_rule_action, rule, fl, flags, arg); if (!err && ops->suppress && INDIRECT_CALL_MT(ops->suppress, fib6_rule_suppress, fib4_rule_suppress, rule, arg)) continue; if (err != -EAGAIN) { if ((arg->flags & FIB_LOOKUP_NOREF) || likely(refcount_inc_not_zero(&rule->refcnt))) { arg->rule = rule; goto out; } break; } } err = -ESRCH; out: rcu_read_unlock(); return err; }
在没有使用l3mdev的情况下,使用rule->table作为table id,然后调用fib_get_table获取该表,最后通过fib_table_lookup进行路由项查找。
int fib4_rule_action(struct fib_rule *rule, struct flowi *flp, int flags, struct fib_lookup_arg *arg) { int err = -EAGAIN; struct fib_table *tbl; u32 tb_id; switch (rule->action) { case FR_ACT_TO_TBL: break; case FR_ACT_UNREACHABLE: return -ENETUNREACH; case FR_ACT_PROHIBIT: return -EACCES; case FR_ACT_BLACKHOLE: default: return -EINVAL; } rcu_read_lock(); tb_id = fib_rule_get_table(rule, arg); tbl = fib_get_table(rule->fr_net, tb_id); if (tbl) err = fib_table_lookup(tbl, &flp->u.ip4, (struct fib_result *)arg->result, arg->flags); rcu_read_unlock(); return err; }
fib_table_lookup这个函数太长 看不懂!!!只知道最后的结果是返回的err
判断条件基本都是看最后一个,也就是关于scope的长度。 flp->flowi4_scope初始化是RT_SCOPE_UNIVERSE=0,然后在ip_route_output_key_hash中设置fl4->flowi4_scope = ((tos & RTO_ONLINK) ?RT_SCOPE_LINK : RT_SCOPE_UNIVERSE);也还是RT_SCOPE_UNIVERSE(因为IP_TOS default=0),这个条件就是任何路由都会满足。
如果设置了
MSG_DONTROUTE,则TOS = RTO_ONLINK,从而导致scope = RT_SCOPE_LINK
往下走是叶子节点的链表遍历。 fa是fib_alias对应的是一条路由,多个fib_alias可以共享一个相同的fib_info,这是真实路由信息,比如设备,下一跳什么的,而其中的fib_info->fib_nh[nhsel]代表了下一跳地址。这儿的nhsel一般是1,除非是多路径支持,不然一条路由一般只有一个下一跳
/* rtm_scope Really it is not scope, but sort of distance to the destination. NOWHERE are reserved for not existing destinations, HOST is our local addresses, LINK are destinations, located on directly attached link and UNIVERSE is everywhere in the Universe. Intermediate values are also possible f.e. interior routes could be assigned a value between UNIVERSE and LINK. */ enum rt_scope_t { RT_SCOPE_UNIVERSE=0, /* User defined values */ RT_SCOPE_SITE=200, RT_SCOPE_LINK=253, RT_SCOPE_HOST=254, RT_SCOPE_NOWHERE=255 };
看
/* this line carries forward the xor from earlier in the function */ index = key ^ n->key; /* Step 3: Process the leaf, if that fails fall back to backtracing */ hlist_for_each_entry_rcu(fa, &n->leaf, fa_list) { struct fib_info *fi = fa->fa_info; struct fib_nh_common *nhc; int nhsel, err; if ((BITS_PER_LONG > KEYLENGTH) || (fa->fa_slen < KEYLENGTH)) { if (index >= (1ul << fa->fa_slen)) continue; } if (fa->fa_tos && fa->fa_tos != flp->flowi4_tos) continue; if (fi->fib_dead) continue; if (fa->fa_info->fib_scope < flp->flowi4_scope) continue; fib_alias_accessed(fa); err = fib_props[fa->fa_type].error; if (unlikely(err < 0)) { out_reject: #ifdef CONFIG_IP_FIB_TRIE_STATS this_cpu_inc(stats->semantic_match_passed); #endif trace_fib_table_lookup(tb->tb_id, flp, NULL, err); return err; } if (fi->fib_flags & RTNH_F_DEAD) continue; if (unlikely(fi->nh)) { if (nexthop_is_blackhole(fi->nh)) { err = fib_props[RTN_BLACKHOLE].error; goto out_reject; } nhc = nexthop_get_nhc_lookup(fi->nh, fib_flags, flp, &nhsel); if (nhc) goto set_result; goto miss; } for (nhsel = 0; nhsel < fib_info_num_path(fi); nhsel++) { nhc = fib_info_nhc(fi, nhsel); if (!fib_lookup_good_nhc(nhc, fib_flags, flp)) continue; set_result: if (!(fib_flags & FIB_LOOKUP_NOREF)) refcount_inc(&fi->fib_clntref); res->prefix = htonl(n->key); res->prefixlen = KEYLENGTH - fa->fa_slen; res->nh_sel = nhsel; res->nhc = nhc; res->type = fa->fa_type; res->scope = fi->fib_scope; res->fi = fi; res->table = tb; res->fa_head = &n->leaf; #ifdef CONFIG_IP_FIB_TRIE_STATS this_cpu_inc(stats->semantic_match_passed); #endif trace_fib_table_lookup(tb->tb_id, flp, nhc, err); return err; }
需要的注意的是返回的err有那些选择?
err = fib_props[fa->fa_type].error;
const struct fib_prop fib_props[RTN_MAX + 1] = { [RTN_UNSPEC] = { .error = 0, .scope = RT_SCOPE_NOWHERE, }, [RTN_UNICAST] = { .error = 0, .scope = RT_SCOPE_UNIVERSE, }, [RTN_LOCAL] = { .error = 0, .scope = RT_SCOPE_HOST, }, [RTN_BROADCAST] = { .error = 0, .scope = RT_SCOPE_LINK, }, [RTN_ANYCAST] = { .error = 0, .scope = RT_SCOPE_LINK, }, [RTN_MULTICAST] = { .error = 0, .scope = RT_SCOPE_UNIVERSE, }, [RTN_BLACKHOLE] = { .error = -EINVAL, .scope = RT_SCOPE_UNIVERSE, }, [RTN_UNREACHABLE] = { .error = -EHOSTUNREACH, .scope = RT_SCOPE_UNIVERSE, }, [RTN_PROHIBIT] = { .error = -EACCES, .scope = RT_SCOPE_UNIVERSE, }, [RTN_THROW] = { .error = -EAGAIN, .scope = RT_SCOPE_UNIVERSE, }, [RTN_NAT] = { .error = -EINVAL, .scope = RT_SCOPE_NOWHERE, }, [RTN_XRESOLVE] = { .error = -EINVAL, .scope = RT_SCOPE_NOWHERE, }, };
对于访问lo接口流量:
local 127.0.0.0/8 dev lo proto kernel scope host src 127.0.0.1 local 127.0.0.1 dev lo proto kernel scope host src 127.0.0.1
一般命中上述路由;最后返回的err = 0,又因为res->type = RTN_LOCAL,且fl4->saddr = 0,
所以fl4->saddr = 127.0.0.1且fl4->flowi4_oif = loopback_dev,
if (res->type == RTN_LOCAL) { if (!fl4->saddr) { if (res->fi->fib_prefsrc) fl4->saddr = res->fi->fib_prefsrc; else fl4->saddr = fl4->daddr; }
接着创建路由缓存条目__mkroute_output。
直接看到rt_set_nexthop(rth, fl4->daddr, res, fnhe, fi, type, 0, do_cache);,

浙公网安备 33010602011771号