ip rule action

添加pbr 相关逻辑:https://www.cnblogs.com/codestack/p/15964315.html

  fib_rules_lookup,判断下action接后执行ops->action的操作,此刻应该是fib4_rule_action

在没有使用l3mdev的情况下,使用rule->table作为table id,然后调用fib_get_table获取该表,最后通过fib_table_lookup进行路由项查找

  • FR_ACT_TO_TBL:

    • 对应 ip rule 命令中常见的行为,它表示将数据包导向某个特定的路由表。
    • 通常是用 lookuptable 指定路由表,例如:
      ip rule add from 192.168.1.0/24 table 100
      在这种情况下,rule->action 会是 FR_ACT_TO_TBL,表示匹配的包应该查找指定的路由表。
  • FR_ACT_UNREACHABLE:

    • 对应 ip rule 中的 unreachable 动作。
    • 当规则匹配时,内核返回 -ENETUNREACH,表示目的地不可达。
    • 示例:
      ip rule add from 192.168.1.0/24 unreachable
      这意味着如果数据包的源地址来自 192.168.1.0/24,内核会将该包丢弃,并返回“网络不可达”(ENETUNREACH)错误。
  • FR_ACT_PROHIBIT:

    • 对应 ip rule 中的 prohibit 动作。
    • 当规则匹配时,内核返回 -EACCES,表示访问被禁止。
    • 示例:
      ip rule add from 192.168.1.0/24 prohibit
      这意味着当数据包的源地址匹配 192.168.1.0/24 时,包将被丢弃,并返回“权限被禁止”(EACCES)错误。
  • FR_ACT_BLACKHOLE:

    • 对应 ip rule 中的 blackhole 动作。
    • 当规则匹配时,内核会简单地丢弃该包,但不会返回错误代码给发送方。
    • 示例:
      ip rule add from 192.168.1.0/24 blackhole
      这意味着当数据包的源地址匹配 192.168.1.0/24 时,包会被悄悄丢弃,发送方不会收到任何通知。
  • default:

    • 如果 rule->action 不匹配任何已知的动作,则 default 情况返回 -EINVAL,表示无效的参数或操作。
    • 这个情况通常不会直接映射到 ip rule,而是表示遇到了未定义或无效的动作
  • FR_ACT_GOTO 
    •   在 Linux 内核中,FR_ACT_GOTO 的实现会修改规则的流控制。当当前规则命中并且操作为 FR_ACT_GOTO 时,内核并不会立即对该规则采取实际的路由决策,
    •   而是转到另一个指定的规则继续匹配。这可以允许用户定义一系列相互关联的规则。
      使用 ip rule 时可以通过 goto 关键字实现这个行为。
      ip rule add from 192.168.1.0/24 table 100
       ip rule add from 10.0.0.0/8 goto 100

       这意味着源自 10.0.0.0/8 的数据包将跳转到规则编号为 100 继续匹配

 

  • FR_ACT_TO_TBL 对应 ip rule 中指定查找路由表的行为(lookuptable)。
  • FR_ACT_UNREACHABLE 对应 ip rule add ... unreachable
  • FR_ACT_PROHIBIT 对应 ip rule add ... prohibit
  • FR_ACT_BLACKHOLE 对应 ip rule add ... blackhole
  • default 是处理未知或无效动作的情况,返回 -EINVAL 错误。
int fib_rules_lookup(struct fib_rules_ops *ops, struct flowi *fl,
             int flags, struct fib_lookup_arg *arg)
{
    struct fib_rule *rule;
    int err;

    rcu_read_lock();

    list_for_each_entry_rcu(rule, &ops->rules_list, list) {
jumped:
    // 查询条件如果和策略路由规则不匹配,直接尝试匹配下一条
if (!fib_rule_match(rule, ops, fl, flags, arg)) continue;       // 规则匹配了,下面根据该路由规则的action执行后续动作 if (rule->action == FR_ACT_GOTO) {// action是跳转到另外一条策略路由规则继续匹配,那么进行跳转 struct fib_rule *target; target = rcu_dereference(rule->ctarget); if (target == NULL) {// 由于允许先指定一个不存在的target规则,所以这里需要特殊处理一下 continue; } else { rule = target;// 跳转,然后继续匹配 goto jumped; } } else if (rule->action == FR_ACT_NOP) continue;// 如果规则的action是什么都不做,那么继续遍历下一条 else err = INDIRECT_CALL_MT(ops->action, fib6_rule_action, fib4_rule_action, rule, fl, flags, arg); if (!err && ops->suppress && INDIRECT_CALL_MT(ops->suppress, fib6_rule_suppress, fib4_rule_suppress, rule, arg)) continue; if (err != -EAGAIN) { if ((arg->flags & FIB_LOOKUP_NOREF) || likely(refcount_inc_not_zero(&rule->refcnt))) { arg->rule = rule; goto out; } break; } } err = -ESRCH; out: rcu_read_unlock(); return err; }

 

 

 在没有使用l3mdev的情况下,使用rule->table作为table id,然后调用fib_get_table获取该表,最后通过fib_table_lookup进行路由项查找。

 

int fib4_rule_action(struct fib_rule *rule,
                         struct flowi *flp, int flags,
                         struct fib_lookup_arg *arg)
{
    int err = -EAGAIN;
    struct fib_table *tbl;
    u32 tb_id;

    switch (rule->action) {
    case FR_ACT_TO_TBL:
        break;

    case FR_ACT_UNREACHABLE:
        return -ENETUNREACH;

    case FR_ACT_PROHIBIT:
        return -EACCES;

    case FR_ACT_BLACKHOLE:
    default:
        return -EINVAL;
    }

    rcu_read_lock();

    tb_id = fib_rule_get_table(rule, arg);
    tbl = fib_get_table(rule->fr_net, tb_id);
    if (tbl)
        err = fib_table_lookup(tbl, &flp->u.ip4,
                       (struct fib_result *)arg->result,
                       arg->flags);

    rcu_read_unlock();
    return err;
}

 

 fib_table_lookup这个函数太长 看不懂!!!只知道最后的结果是返回的err

判断条件基本都是看最后一个,也就是关于scope的长度。 flp->flowi4_scope初始化是RT_SCOPE_UNIVERSE=0,然后在ip_route_output_key_hash中设置fl4->flowi4_scope = ((tos & RTO_ONLINK) ?RT_SCOPE_LINK : RT_SCOPE_UNIVERSE);也还是RT_SCOPE_UNIVERSE(因为IP_TOS default=0),这个条件就是任何路由都会满足。

如果设置了MSG_DONTROUTE,则TOS = RTO_ONLINK,从而导致scope = RT_SCOPE_LINK

往下走是叶子节点的链表遍历。 fafib_alias对应的是一条路由,多个fib_alias可以共享一个相同的fib_info,这是真实路由信息,比如设备,下一跳什么的,而其中的fib_info->fib_nh[nhsel]代表了下一跳地址。这儿的nhsel一般是1,除非是多路径支持,不然一条路由一般只有一个下一跳

/* rtm_scope

   Really it is not scope, but sort of distance to the destination.
   NOWHERE are reserved for not existing destinations, HOST is our
   local addresses, LINK are destinations, located on directly attached
   link and UNIVERSE is everywhere in the Universe.

   Intermediate values are also possible f.e. interior routes
   could be assigned a value between UNIVERSE and LINK.
*/

enum rt_scope_t {
    RT_SCOPE_UNIVERSE=0,
/* User defined values  */
    RT_SCOPE_SITE=200,
    RT_SCOPE_LINK=253,
    RT_SCOPE_HOST=254,
    RT_SCOPE_NOWHERE=255
};

 

    /* this line carries forward the xor from earlier in the function */
    index = key ^ n->key;

    /* Step 3: Process the leaf, if that fails fall back to backtracing */
    hlist_for_each_entry_rcu(fa, &n->leaf, fa_list) {
        struct fib_info *fi = fa->fa_info;
        struct fib_nh_common *nhc;
        int nhsel, err;

        if ((BITS_PER_LONG > KEYLENGTH) || (fa->fa_slen < KEYLENGTH)) {
            if (index >= (1ul << fa->fa_slen))
                continue;
        }
        if (fa->fa_tos && fa->fa_tos != flp->flowi4_tos)
            continue;
        if (fi->fib_dead)
            continue;
        if (fa->fa_info->fib_scope < flp->flowi4_scope)
            continue;
        fib_alias_accessed(fa);
        err = fib_props[fa->fa_type].error;
        if (unlikely(err < 0)) {
out_reject:
#ifdef CONFIG_IP_FIB_TRIE_STATS
            this_cpu_inc(stats->semantic_match_passed);
#endif
            trace_fib_table_lookup(tb->tb_id, flp, NULL, err);
            return err;
        }
        if (fi->fib_flags & RTNH_F_DEAD)
            continue;

        if (unlikely(fi->nh)) {
            if (nexthop_is_blackhole(fi->nh)) {
                err = fib_props[RTN_BLACKHOLE].error;
                goto out_reject;
            }

            nhc = nexthop_get_nhc_lookup(fi->nh, fib_flags, flp,
                             &nhsel);
            if (nhc)
                goto set_result;
            goto miss;
        }

        for (nhsel = 0; nhsel < fib_info_num_path(fi); nhsel++) {
            nhc = fib_info_nhc(fi, nhsel);

            if (!fib_lookup_good_nhc(nhc, fib_flags, flp))
                continue;
set_result:
            if (!(fib_flags & FIB_LOOKUP_NOREF))
                refcount_inc(&fi->fib_clntref);

            res->prefix = htonl(n->key);
            res->prefixlen = KEYLENGTH - fa->fa_slen;
            res->nh_sel = nhsel;
            res->nhc = nhc;
            res->type = fa->fa_type;
            res->scope = fi->fib_scope;
            res->fi = fi;
            res->table = tb;
            res->fa_head = &n->leaf;
#ifdef CONFIG_IP_FIB_TRIE_STATS
            this_cpu_inc(stats->semantic_match_passed);
#endif
            trace_fib_table_lookup(tb->tb_id, flp, nhc, err);

            return err;
        }

 

需要的注意的是返回的err有那些选择?

err = fib_props[fa->fa_type].error;
const struct fib_prop fib_props[RTN_MAX + 1] = {
    [RTN_UNSPEC] = {
        .error    = 0,
        .scope    = RT_SCOPE_NOWHERE,
    },
    [RTN_UNICAST] = {
        .error    = 0,
        .scope    = RT_SCOPE_UNIVERSE,
    },
    [RTN_LOCAL] = {
        .error    = 0,
        .scope    = RT_SCOPE_HOST,
    },
    [RTN_BROADCAST] = {
        .error    = 0,
        .scope    = RT_SCOPE_LINK,
    },
    [RTN_ANYCAST] = {
        .error    = 0,
        .scope    = RT_SCOPE_LINK,
    },
    [RTN_MULTICAST] = {
        .error    = 0,
        .scope    = RT_SCOPE_UNIVERSE,
    },
    [RTN_BLACKHOLE] = {
        .error    = -EINVAL,
        .scope    = RT_SCOPE_UNIVERSE,
    },
    [RTN_UNREACHABLE] = {
        .error    = -EHOSTUNREACH,
        .scope    = RT_SCOPE_UNIVERSE,
    },
    [RTN_PROHIBIT] = {
        .error    = -EACCES,
        .scope    = RT_SCOPE_UNIVERSE,
    },
    [RTN_THROW] = {
        .error    = -EAGAIN,
        .scope    = RT_SCOPE_UNIVERSE,
    },
    [RTN_NAT] = {
        .error    = -EINVAL,
        .scope    = RT_SCOPE_NOWHERE,
    },
    [RTN_XRESOLVE] = {
        .error    = -EINVAL,
        .scope    = RT_SCOPE_NOWHERE,
    },
};

 

对于访问lo接口流量:

local 127.0.0.0/8 dev lo proto kernel scope host src 127.0.0.1 
local 127.0.0.1 dev lo proto kernel scope host src 127.0.0.1 

 

一般命中上述路由;最后返回的err = 0,又因为res->type = RTN_LOCAL,且fl4->saddr = 0

所以fl4->saddr = 127.0.0.1fl4->flowi4_oif = loopback_dev

 

    if (res->type == RTN_LOCAL) {
        if (!fl4->saddr) {
            if (res->fi->fib_prefsrc)
                fl4->saddr = res->fi->fib_prefsrc;
            else
                fl4->saddr = fl4->daddr;
        }

 

 

接着创建路由缓存条目__mkroute_output

直接看到rt_set_nexthop(rth, fl4->daddr, res, fnhe, fi, type, 0, do_cache);

 

 

posted @ 2024-10-12 16:27  codestacklinuxer  阅读(26)  评论(0)    收藏  举报