openvswitch 源码分析 OVS_ACTION_ATTR_HASH action

1.在ovs_dp_process_packet中查找kernel缓存流表,查到后执行ovs_execute_actions->do_execute_actions,其中有个actions是OVS_ACTION_ATTR_HASH

 1 /* Must be called with rcu_read_lock. */
 2 void ovs_dp_process_packet(struct sk_buff *skb, struct sw_flow_key *key)
 3 {
 4     const struct vport *p = OVS_CB(skb)->input_vport;
 5     struct datapath *dp = p->dp;
 6     struct sw_flow *flow;
 7     struct sw_flow_actions *sf_acts;
 8     struct dp_stats_percpu *stats;
 9     u64 *stats_counter;
10     u32 n_mask_hit;
11 
12     stats = this_cpu_ptr(dp->stats_percpu);
13 
14     /* Look up flow. */
15     flow = ovs_flow_tbl_lookup_stats(&dp->table, key, skb_get_hash(skb),
16                      &n_mask_hit);
17     if (unlikely(!flow)) {
18         struct dp_upcall_info upcall;
19         int error;
20 
21         memset(&upcall, 0, sizeof(upcall));
22         upcall.cmd = OVS_PACKET_CMD_MISS;
23         upcall.portid = ovs_vport_find_upcall_portid(p, skb);
24         upcall.mru = OVS_CB(skb)->mru;
25         error = ovs_dp_upcall(dp, skb, key, &upcall, 0);
26         if (unlikely(error))
27             kfree_skb(skb);
28         else
29             consume_skb(skb);
30         stats_counter = &stats->n_missed;
31         goto out;
32     }
33 
34     ovs_flow_stats_update(flow, key->tp.flags, skb);
35     sf_acts = rcu_dereference(flow->sf_acts);
36     ovs_execute_actions(dp, skb, sf_acts, key);
37 
38     stats_counter = &stats->n_hit;
39 
40 out:
41     /* Update datapath statistics. */
42     u64_stats_update_begin(&stats->syncp);
43     (*stats_counter)++;
44     stats->n_mask_hit += n_mask_hit;
45     u64_stats_update_end(&stats->syncp);
46 }
 1 /* Execute a list of actions against 'skb'. */
 2 int ovs_execute_actions(struct datapath *dp, struct sk_buff *skb,
 3             const struct sw_flow_actions *acts,
 4             struct sw_flow_key *key)
 5 {
 6     int err, level;
 7 
 8     level = __this_cpu_inc_return(exec_actions_level);
 9     if (unlikely(level > OVS_RECURSION_LIMIT)) {
10         net_crit_ratelimited("ovs: recursion limit reached on datapath %s, probable configuration error\n",
11                      ovs_dp_name(dp));
12         kfree_skb(skb);
13         err = -ENETDOWN;
14         goto out;
15     }
16 
17     OVS_CB(skb)->acts_origlen = acts->orig_len;
18     err = do_execute_actions(dp, skb, key,
19                  acts->actions, acts->actions_len);
20 
21     if (level == 1)
22         process_deferred_actions(dp);
23 
24 out:
25     __this_cpu_dec(exec_actions_level);
26     return err;
27 }

2.do_execute_actions中会调用execute_hash

 1 static void execute_hash(struct sk_buff *skb, struct sw_flow_key *key,
 2              const struct nlattr *attr)
 3 {
 4     struct ovs_action_hash *hash_act = nla_data(attr);
 5     u32 hash = 0;
 6 
 7     /* OVS_HASH_ALG_L4 is the only possible hash algorithm.  */
 8     hash = skb_get_hash(skb);
 9     hash = jhash_1word(hash, hash_act->hash_basis);
10     if (!hash)
11         hash = 0x1;
12 
13     key->ovs_flow_hash = hash;
14 }

3.该action仅对key的ovs_flow_hash成员变量进行了修改,从该变量的使用地方逆推,最终是queue_userspace_packet会使用,该函数是把报文发送给用户态进程,本次就看下queue_userspace_packet函数是如何使用到该成员变量的

  1 static int queue_userspace_packet(struct datapath *dp, struct sk_buff *skb,
  2                   const struct sw_flow_key *key,
  3                   const struct dp_upcall_info *upcall_info,
  4                   uint32_t cutlen)
  5 {
  6     struct ovs_header *upcall;
  7     struct sk_buff *nskb = NULL;
  8     struct sk_buff *user_skb = NULL; /* to be queued to userspace */
  9     struct nlattr *nla;
 10     size_t len;
 11     unsigned int hlen;
 12     int err, dp_ifindex;
 13 
 14     dp_ifindex = get_dpifindex(dp);
 15     if (!dp_ifindex)
 16         return -ENODEV;
 17 
 18     if (skb_vlan_tag_present(skb)) {
 19         nskb = skb_clone(skb, GFP_ATOMIC);
 20         if (!nskb)
 21             return -ENOMEM;
 22 
 23         nskb = __vlan_hwaccel_push_inside(nskb);
 24         if (!nskb)
 25             return -ENOMEM;
 26 
 27         skb = nskb;
 28     }
 29 
 30     if (nla_attr_size(skb->len) > USHRT_MAX) {
 31         err = -EFBIG;
 32         goto out;
 33     }
 34 
 35     /* Complete checksum if needed */
 36     if (skb->ip_summed == CHECKSUM_PARTIAL &&
 37         (err = skb_csum_hwoffload_help(skb, 0)))
 38         goto out;
 39 
 40     /* Older versions of OVS user space enforce alignment of the last
 41      * Netlink attribute to NLA_ALIGNTO which would require extensive
 42      * padding logic. Only perform zerocopy if padding is not required.
 43      */
 44     if (dp->user_features & OVS_DP_F_UNALIGNED)
 45         hlen = skb_zerocopy_headlen(skb);
 46     else
 47         hlen = skb->len;
 48 
 49     len = upcall_msg_size(upcall_info, hlen - cutlen,
 50                   OVS_CB(skb)->acts_origlen);
 51     user_skb = genlmsg_new(len, GFP_ATOMIC);
 52     if (!user_skb) {
 53         err = -ENOMEM;
 54         goto out;
 55     }
 56 
 57     upcall = genlmsg_put(user_skb, 0, 0, &dp_packet_genl_family,
 58                  0, upcall_info->cmd);
 59     upcall->dp_ifindex = dp_ifindex;
 60 
 61     err = ovs_nla_put_key(key, key, OVS_PACKET_ATTR_KEY, false, user_skb);
 62     BUG_ON(err);
 63 
 64     if (upcall_info->userdata)
 65         __nla_put(user_skb, OVS_PACKET_ATTR_USERDATA,
 66               nla_len(upcall_info->userdata),
 67               nla_data(upcall_info->userdata));
 68 
 69 
 70     if (upcall_info->egress_tun_info) {
 71         nla = nla_nest_start(user_skb, OVS_PACKET_ATTR_EGRESS_TUN_KEY);
 72         err = ovs_nla_put_tunnel_info(user_skb,
 73                           upcall_info->egress_tun_info);
 74         BUG_ON(err);
 75         nla_nest_end(user_skb, nla);
 76     }
 77 
 78     if (upcall_info->actions_len) {
 79         nla = nla_nest_start(user_skb, OVS_PACKET_ATTR_ACTIONS);
 80         err = ovs_nla_put_actions(upcall_info->actions,
 81                       upcall_info->actions_len,
 82                       user_skb);
 83         if (!err)
 84             nla_nest_end(user_skb, nla);
 85         else
 86             nla_nest_cancel(user_skb, nla);
 87     }
 88 
 89     /* Add OVS_PACKET_ATTR_MRU */
 90     if (upcall_info->mru) {
 91         if (nla_put_u16(user_skb, OVS_PACKET_ATTR_MRU,
 92                 upcall_info->mru)) {
 93             err = -ENOBUFS;
 94             goto out;
 95         }
 96         pad_packet(dp, user_skb);
 97     }
 98 
 99     /* Add OVS_PACKET_ATTR_LEN when packet is truncated */
100     if (cutlen > 0) {
101         if (nla_put_u32(user_skb, OVS_PACKET_ATTR_LEN,
102                 skb->len)) {
103             err = -ENOBUFS;
104             goto out;
105         }
106         pad_packet(dp, user_skb);
107     }
108 
109     /* Only reserve room for attribute header, packet data is added
110      * in skb_zerocopy()
111      */
112     if (!(nla = nla_reserve(user_skb, OVS_PACKET_ATTR_PACKET, 0))) {
113         err = -ENOBUFS;
114         goto out;
115     }
116     nla->nla_len = nla_attr_size(skb->len - cutlen);
117 
118     err = skb_zerocopy(user_skb, skb, skb->len - cutlen, hlen);
119     if (err)
120         goto out;
121 
122     /* Pad OVS_PACKET_ATTR_PACKET if linear copy was performed */
123     pad_packet(dp, user_skb);
124 
125     ((struct nlmsghdr *) user_skb->data)->nlmsg_len = user_skb->len;
126 
127     err = genlmsg_unicast(ovs_dp_get_net(dp), user_skb, upcall_info->portid);
128     user_skb = NULL;
129 out:
130     if (err)
131         skb_tx_error(skb);
132     kfree_skb(user_skb);
133     kfree_skb(nskb);
134     return err;
135 }

4.ovs_nla_put_key函数

 1 int ovs_nla_put_key(const struct sw_flow_key *swkey,
 2             const struct sw_flow_key *output, int attr, bool is_mask,
 3             struct sk_buff *skb)
 4 {
 5     int err;
 6     struct nlattr *nla;
 7 
 8     nla = nla_nest_start(skb, attr);
 9     if (!nla)
10         return -EMSGSIZE;
11     err = __ovs_nla_put_key(swkey, output, is_mask, skb);
12     if (err)
13         return err;
14     nla_nest_end(skb, nla);
15 
16     return 0;
17 }

5.__ovs_nla_put_key函数

  1 static int __ovs_nla_put_key(const struct sw_flow_key *swkey,
  2                  const struct sw_flow_key *output, bool is_mask,
  3                  struct sk_buff *skb)
  4 {
  5     struct ovs_key_ethernet *eth_key;
  6     struct nlattr *nla;
  7     struct nlattr *encap = NULL;
  8     struct nlattr *in_encap = NULL;
  9 
 10     if (nla_put_u32(skb, OVS_KEY_ATTR_RECIRC_ID, output->recirc_id))
 11         goto nla_put_failure;
 12 
 13     if (nla_put_u32(skb, OVS_KEY_ATTR_DP_HASH, output->ovs_flow_hash))
 14         goto nla_put_failure;
 15 
 16     if (nla_put_u32(skb, OVS_KEY_ATTR_PRIORITY, output->phy.priority))
 17         goto nla_put_failure;
 18 
 19     if ((swkey->tun_proto || is_mask)) {
 20         const void *opts = NULL;
 21 
 22         if (output->tun_key.tun_flags & TUNNEL_OPTIONS_PRESENT)
 23             opts = TUN_METADATA_OPTS(output, swkey->tun_opts_len);
 24 
 25         if (ip_tun_to_nlattr(skb, &output->tun_key, opts,
 26                      swkey->tun_opts_len, swkey->tun_proto))
 27             goto nla_put_failure;
 28     }
 29 
 30     if (swkey->phy.in_port == DP_MAX_PORTS) {
 31         if (is_mask && (output->phy.in_port == 0xffff))
 32             if (nla_put_u32(skb, OVS_KEY_ATTR_IN_PORT, 0xffffffff))
 33                 goto nla_put_failure;
 34     } else {
 35         u16 upper_u16;
 36         upper_u16 = !is_mask ? 0 : 0xffff;
 37 
 38         if (nla_put_u32(skb, OVS_KEY_ATTR_IN_PORT,
 39                 (upper_u16 << 16) | output->phy.in_port))
 40             goto nla_put_failure;
 41     }
 42 
 43     if (nla_put_u32(skb, OVS_KEY_ATTR_SKB_MARK, output->phy.skb_mark))
 44         goto nla_put_failure;
 45 
 46     if (ovs_ct_put_key(swkey, output, skb))
 47         goto nla_put_failure;
 48 
 49     if (ovs_key_mac_proto(swkey) == MAC_PROTO_ETHERNET) {
 50         nla = nla_reserve(skb, OVS_KEY_ATTR_ETHERNET, sizeof(*eth_key));
 51         if (!nla)
 52             goto nla_put_failure;
 53 
 54         eth_key = nla_data(nla);
 55         ether_addr_copy(eth_key->eth_src, output->eth.src);
 56         ether_addr_copy(eth_key->eth_dst, output->eth.dst);
 57 
 58         if (swkey->eth.vlan.tci || eth_type_vlan(swkey->eth.type)) {
 59             if (ovs_nla_put_vlan(skb, &output->eth.vlan, is_mask))
 60                 goto nla_put_failure;
 61             encap = nla_nest_start(skb, OVS_KEY_ATTR_ENCAP);
 62             if (!swkey->eth.vlan.tci)
 63                 goto unencap;
 64 
 65             if (swkey->eth.cvlan.tci || eth_type_vlan(swkey->eth.type)) {
 66                 if (ovs_nla_put_vlan(skb, &output->eth.cvlan, is_mask))
 67                     goto nla_put_failure;
 68                 in_encap = nla_nest_start(skb, OVS_KEY_ATTR_ENCAP);
 69                 if (!swkey->eth.cvlan.tci)
 70                     goto unencap;
 71             }
 72         }
 73 
 74         if (swkey->eth.type == htons(ETH_P_802_2)) {
 75             /*
 76              * Ethertype 802.2 is represented in the netlink with omitted
 77              * OVS_KEY_ATTR_ETHERTYPE in the flow key attribute, and
 78              * 0xffff in the mask attribute.  Ethertype can also
 79              * be wildcarded.
 80              */
 81             if (is_mask && output->eth.type)
 82                 if (nla_put_be16(skb, OVS_KEY_ATTR_ETHERTYPE,
 83                             output->eth.type))
 84                     goto nla_put_failure;
 85             goto unencap;
 86         }
 87     }
 88 
 89     if (nla_put_be16(skb, OVS_KEY_ATTR_ETHERTYPE, output->eth.type))
 90         goto nla_put_failure;
 91 
 92     if (eth_type_vlan(swkey->eth.type)) {
 93         /* There are 3 VLAN tags, we don't know anything about the rest
 94          * of the packet, so truncate here.
 95          */
 96         WARN_ON_ONCE(!(encap && in_encap));
 97         goto unencap;
 98     }
 99 
100     if (swkey->eth.type == htons(ETH_P_IP)) {
101         struct ovs_key_ipv4 *ipv4_key;
102 
103         nla = nla_reserve(skb, OVS_KEY_ATTR_IPV4, sizeof(*ipv4_key));
104         if (!nla)
105             goto nla_put_failure;
106         ipv4_key = nla_data(nla);
107         ipv4_key->ipv4_src = output->ipv4.addr.src;
108         ipv4_key->ipv4_dst = output->ipv4.addr.dst;
109         ipv4_key->ipv4_proto = output->ip.proto;
110         ipv4_key->ipv4_tos = output->ip.tos;
111         ipv4_key->ipv4_ttl = output->ip.ttl;
112         ipv4_key->ipv4_frag = output->ip.frag;
113     } else if (swkey->eth.type == htons(ETH_P_IPV6)) {
114         struct ovs_key_ipv6 *ipv6_key;
115 
116         nla = nla_reserve(skb, OVS_KEY_ATTR_IPV6, sizeof(*ipv6_key));
117         if (!nla)
118             goto nla_put_failure;
119         ipv6_key = nla_data(nla);
120         memcpy(ipv6_key->ipv6_src, &output->ipv6.addr.src,
121                 sizeof(ipv6_key->ipv6_src));
122         memcpy(ipv6_key->ipv6_dst, &output->ipv6.addr.dst,
123                 sizeof(ipv6_key->ipv6_dst));
124         ipv6_key->ipv6_label = output->ipv6.label;
125         ipv6_key->ipv6_proto = output->ip.proto;
126         ipv6_key->ipv6_tclass = output->ip.tos;
127         ipv6_key->ipv6_hlimit = output->ip.ttl;
128         ipv6_key->ipv6_frag = output->ip.frag;
129     } else if (swkey->eth.type == htons(ETH_P_NSH)) {
130         if (nsh_key_to_nlattr(&output->nsh, is_mask, skb))
131             goto nla_put_failure;
132     } else if (swkey->eth.type == htons(ETH_P_ARP) ||
133            swkey->eth.type == htons(ETH_P_RARP)) {
134         struct ovs_key_arp *arp_key;
135 
136         nla = nla_reserve(skb, OVS_KEY_ATTR_ARP, sizeof(*arp_key));
137         if (!nla)
138             goto nla_put_failure;
139         arp_key = nla_data(nla);
140         memset(arp_key, 0, sizeof(struct ovs_key_arp));
141         arp_key->arp_sip = output->ipv4.addr.src;
142         arp_key->arp_tip = output->ipv4.addr.dst;
143         arp_key->arp_op = htons(output->ip.proto);
144         ether_addr_copy(arp_key->arp_sha, output->ipv4.arp.sha);
145         ether_addr_copy(arp_key->arp_tha, output->ipv4.arp.tha);
146     } else if (eth_p_mpls(swkey->eth.type)) {
147         struct ovs_key_mpls *mpls_key;
148 
149         nla = nla_reserve(skb, OVS_KEY_ATTR_MPLS, sizeof(*mpls_key));
150         if (!nla)
151             goto nla_put_failure;
152         mpls_key = nla_data(nla);
153         mpls_key->mpls_lse = output->mpls.top_lse;
154     }
155 
156     if ((swkey->eth.type == htons(ETH_P_IP) ||
157          swkey->eth.type == htons(ETH_P_IPV6)) &&
158          swkey->ip.frag != OVS_FRAG_TYPE_LATER) {
159 
160         if (swkey->ip.proto == IPPROTO_TCP) {
161             struct ovs_key_tcp *tcp_key;
162 
163             nla = nla_reserve(skb, OVS_KEY_ATTR_TCP, sizeof(*tcp_key));
164             if (!nla)
165                 goto nla_put_failure;
166             tcp_key = nla_data(nla);
167             tcp_key->tcp_src = output->tp.src;
168             tcp_key->tcp_dst = output->tp.dst;
169             if (nla_put_be16(skb, OVS_KEY_ATTR_TCP_FLAGS,
170                      output->tp.flags))
171                 goto nla_put_failure;
172         } else if (swkey->ip.proto == IPPROTO_UDP) {
173             struct ovs_key_udp *udp_key;
174 
175             nla = nla_reserve(skb, OVS_KEY_ATTR_UDP, sizeof(*udp_key));
176             if (!nla)
177                 goto nla_put_failure;
178             udp_key = nla_data(nla);
179             udp_key->udp_src = output->tp.src;
180             udp_key->udp_dst = output->tp.dst;
181         } else if (swkey->ip.proto == IPPROTO_SCTP) {
182             struct ovs_key_sctp *sctp_key;
183 
184             nla = nla_reserve(skb, OVS_KEY_ATTR_SCTP, sizeof(*sctp_key));
185             if (!nla)
186                 goto nla_put_failure;
187             sctp_key = nla_data(nla);
188             sctp_key->sctp_src = output->tp.src;
189             sctp_key->sctp_dst = output->tp.dst;
190         } else if (swkey->eth.type == htons(ETH_P_IP) &&
191                swkey->ip.proto == IPPROTO_ICMP) {
192             struct ovs_key_icmp *icmp_key;
193 
194             nla = nla_reserve(skb, OVS_KEY_ATTR_ICMP, sizeof(*icmp_key));
195             if (!nla)
196                 goto nla_put_failure;
197             icmp_key = nla_data(nla);
198             icmp_key->icmp_type = ntohs(output->tp.src);
199             icmp_key->icmp_code = ntohs(output->tp.dst);
200         } else if (swkey->eth.type == htons(ETH_P_IPV6) &&
201                swkey->ip.proto == IPPROTO_ICMPV6) {
202             struct ovs_key_icmpv6 *icmpv6_key;
203 
204             nla = nla_reserve(skb, OVS_KEY_ATTR_ICMPV6,
205                         sizeof(*icmpv6_key));
206             if (!nla)
207                 goto nla_put_failure;
208             icmpv6_key = nla_data(nla);
209             icmpv6_key->icmpv6_type = ntohs(output->tp.src);
210             icmpv6_key->icmpv6_code = ntohs(output->tp.dst);
211 
212             if (icmpv6_key->icmpv6_type == NDISC_NEIGHBOUR_SOLICITATION ||
213                 icmpv6_key->icmpv6_type == NDISC_NEIGHBOUR_ADVERTISEMENT) {
214                 struct ovs_key_nd *nd_key;
215 
216                 nla = nla_reserve(skb, OVS_KEY_ATTR_ND, sizeof(*nd_key));
217                 if (!nla)
218                     goto nla_put_failure;
219                 nd_key = nla_data(nla);
220                 memcpy(nd_key->nd_target, &output->ipv6.nd.target,
221                             sizeof(nd_key->nd_target));
222                 ether_addr_copy(nd_key->nd_sll, output->ipv6.nd.sll);
223                 ether_addr_copy(nd_key->nd_tll, output->ipv6.nd.tll);
224             }
225         }
226     }
227 
228 unencap:
229     if (in_encap)
230         nla_nest_end(skb, in_encap);
231     if (encap)
232         nla_nest_end(skb, encap);
233 
234     return 0;
235 
236 nla_put_failure:
237     return -EMSGSIZE;
238 }

 

posted @ 2019-03-18 09:46  salami_china  阅读(646)  评论(0)    收藏  举报