网络设备之net_device结构与操作

net_device结构是二层中一个非常重要的结构,其结构中成员很多,包含了硬件信息,接口信息,其他辅助信息,以及设备操作函数等等;下面对其中重要的字段含义进行了标注,目前仍在阅读代码过程中,其中某些字段含义会在后面进行补充或者更新;

  1 struct net_device {
  2     /* 设备名称,如eth0 */
  3     char            name[IFNAMSIZ];
  4     /* 名称hash */
  5     struct hlist_node    name_hlist;
  6     char             *ifalias;
  7     /*
  8      *    I/O specific fields
  9      *    FIXME: Merge these and struct ifmap into one
 10      */
 11     /* 
 12         描述设备所用的共享内存,用于设备与内核沟通 
 13         其初始化和访问只会在设备驱动程序内进行
 14     */
 15     unsigned long        mem_end;
 16     unsigned long        mem_start;
 17 
 18     /* 设备自有内存映射到I/O内存的起始地址 */
 19     unsigned long        base_addr;
 20 
 21     /*
 22         设备与内核对话的中断编号,此值可由多个设备共享
 23         驱动程序使用request_irq函数分配此变量,使用free_irq予以释放
 24     */
 25     int            irq;
 26 
 27     /* 侦测网络状态的改变次数 */
 28     atomic_t        carrier_changes;
 29 
 30     /*
 31      *    Some hardware also needs these fields (state,dev_list,
 32      *    napi_list,unreg_list,close_list) but they are not
 33      *    part of the usual set specified in Space.c.
 34      */
 35 
 36     /* 
 37         网络队列子系统使用的一组标识
 38         由__LINK_STATE_xxx标识
 39     */
 40     unsigned long        state;
 41 
 42     struct list_head    dev_list;
 43     struct list_head    napi_list;
 44     struct list_head    unreg_list;
 45     struct list_head    close_list;
 46 
 47     /* 当前设备所有协议的链表 */
 48     struct list_head    ptype_all;
 49     /* 当前设备特定协议的链表 */
 50     struct list_head    ptype_specific;
 51 
 52     struct {
 53         struct list_head upper;
 54         struct list_head lower;
 55     } adj_list;
 56 
 57     /* 
 58         用于存在其他一些设备功能
 59         可报告适配卡的功能,以便与CPU通信
 60         使用NETIF_F_XXX标识功能特性
 61     */
 62     netdev_features_t    features;
 63     netdev_features_t    hw_features;
 64     netdev_features_t    wanted_features;
 65     netdev_features_t    vlan_features;
 66     netdev_features_t    hw_enc_features;
 67     netdev_features_t    mpls_features;
 68     netdev_features_t    gso_partial_features;
 69 
 70     /* 网络设备索引号 */
 71     int            ifindex;
 72 
 73     /* 设备组,默认都属于0组 */
 74     int            group;
 75 
 76     struct net_device_stats    stats;
 77 
 78     atomic_long_t        rx_dropped;
 79     atomic_long_t        tx_dropped;
 80     atomic_long_t        rx_nohandler;
 81 
 82 #ifdef CONFIG_WIRELESS_EXT
 83     const struct iw_handler_def *wireless_handlers;
 84     struct iw_public_data    *wireless_data;
 85 #endif
 86     /* 设备操作接口 */
 87     const struct net_device_ops *netdev_ops;
 88     /* ethtool操作接口 */
 89     const struct ethtool_ops *ethtool_ops;
 90 #ifdef CONFIG_NET_SWITCHDEV
 91     const struct switchdev_ops *switchdev_ops;
 92 #endif
 93 #ifdef CONFIG_NET_L3_MASTER_DEV
 94     const struct l3mdev_ops    *l3mdev_ops;
 95 #endif
 96 #if IS_ENABLED(CONFIG_IPV6)
 97     const struct ndisc_ops *ndisc_ops;
 98 #endif
 99 
100 #ifdef CONFIG_XFRM
101     const struct xfrmdev_ops *xfrmdev_ops;
102 #endif
103 
104     /* 头部一些操作,如链路层缓存,校验等 */
105     const struct header_ops *header_ops;
106 
107     /* 标识接口特性,IFF_XXX,如IFF_UP */
108     unsigned int        flags;
109 
110     /* 
111         用于存储用户空间不可见的标识
112         由VLAN和Bridge虚拟设备使用
113     */
114     unsigned int        priv_flags;
115 
116     /* 几乎不使用,为了兼容保留 */
117     unsigned short        gflags;
118 
119     /* 结构对齐填充 */
120     unsigned short        padded;
121 
122     /* 与interface group mib中的IfOperStatus相关 */
123     unsigned char        operstate;
124     unsigned char        link_mode;
125 
126     /* 
127         接口使用的端口类型
128     */
129     unsigned char        if_port;
130 
131     /*
132         设备使用的DMA通道
133         并非所有设备都可以用DMA,有些总线不支持DMA
134     */
135     unsigned char        dma;
136 
137     /*
138         最大传输单元,标识设备能处理帧的最大尺寸 
139         Ethernet-1500
140     */
141     unsigned int        mtu;
142     /* 最小mtu,Ethernet-68 */
143     unsigned int        min_mtu;
144     /* 最大mut,Ethernet-65535 */
145     unsigned int        max_mtu;
146 
147     /*     设备所属类型
148         ARP模块中,用type判断接口的硬件地址类型
149         以太网接口为ARPHRD_ETHER 
150     */
151     unsigned short        type;
152     /* 
153         设备头部长度
154         Ethernet报头是ETH_HLEN=14字节
155     */
156     unsigned short        hard_header_len;
157     unsigned char        min_header_len;
158 
159     /* 必须的头部空间 */
160     unsigned short        needed_headroom;
161     unsigned short        needed_tailroom;
162 
163     /* Interface address info. */
164     /* 硬件地址,通常在初始化过程中从硬件读取 */
165     unsigned char        perm_addr[MAX_ADDR_LEN];
166     unsigned char        addr_assign_type;
167     /* 硬件地址长度 */
168     unsigned char        addr_len;
169     unsigned short        neigh_priv_len;
170     unsigned short          dev_id;
171     unsigned short          dev_port;
172     spinlock_t        addr_list_lock;
173     /* 设备名赋值类型,如NET_NAME_UNKNOWN */
174     unsigned char        name_assign_type;
175     bool            uc_promisc;
176     struct netdev_hw_addr_list    uc;
177     struct netdev_hw_addr_list    mc;
178     struct netdev_hw_addr_list    dev_addrs;
179 
180 #ifdef CONFIG_SYSFS
181     struct kset        *queues_kset;
182 #endif
183     /* 混杂模式开启数量 */
184     unsigned int        promiscuity;
185 
186     /* 非零值时,设备监听所有多播地址 */
187     unsigned int        allmulti;
188 
189 
190     /* Protocol-specific pointers */
191 /* 特定协议的指针 */
192 #if IS_ENABLED(CONFIG_VLAN_8021Q)
193     struct vlan_info __rcu    *vlan_info;
194 #endif
195 #if IS_ENABLED(CONFIG_NET_DSA)
196     struct dsa_switch_tree    *dsa_ptr;
197 #endif
198 #if IS_ENABLED(CONFIG_TIPC)
199     struct tipc_bearer __rcu *tipc_ptr;
200 #endif
201     void             *atalk_ptr;
202     /* ip指向in_device结构 */    
203     struct in_device __rcu    *ip_ptr;
204     struct dn_dev __rcu     *dn_ptr;
205     struct inet6_dev __rcu    *ip6_ptr;
206     void            *ax25_ptr;
207     struct wireless_dev    *ieee80211_ptr;
208     struct wpan_dev        *ieee802154_ptr;
209 #if IS_ENABLED(CONFIG_MPLS_ROUTING)
210     struct mpls_dev __rcu    *mpls_ptr;
211 #endif
212 
213 /*
214  * Cache lines mostly used on receive path (including eth_type_trans())
215  */
216     /* Interface address info used in eth_type_trans() */
217     unsigned char        *dev_addr;
218 
219 #ifdef CONFIG_SYSFS
220     /* 接收队列 */
221     struct netdev_rx_queue    *_rx;
222 
223     /* 接收队列数 */
224     unsigned int        num_rx_queues;
225     unsigned int        real_num_rx_queues;
226 #endif
227 
228     struct bpf_prog __rcu    *xdp_prog;
229     unsigned long        gro_flush_timeout;
230 
231     /* 如网桥等的收包回调 */
232     rx_handler_func_t __rcu    *rx_handler;
233     /* 回调参数 */
234     void __rcu        *rx_handler_data;
235 
236 #ifdef CONFIG_NET_CLS_ACT
237     struct tcf_proto __rcu  *ingress_cl_list;
238 #endif
239     struct netdev_queue __rcu *ingress_queue;
240 #ifdef CONFIG_NETFILTER_INGRESS
241     /* netfilter入口 */
242     struct nf_hook_entry __rcu *nf_hooks_ingress;
243 #endif
244 
245     /* 链路层广播地址 */
246     unsigned char        broadcast[MAX_ADDR_LEN];
247 #ifdef CONFIG_RFS_ACCEL
248     struct cpu_rmap        *rx_cpu_rmap;
249 #endif
250     /* 接口索引hash */
251     struct hlist_node    index_hlist;
252 
253 /*
254  * Cache lines mostly used on transmit path
255  */
256      /* 发送队列 */
257     struct netdev_queue    *_tx ____cacheline_aligned_in_smp;
258     /* 发送队列数 */
259     unsigned int        num_tx_queues;
260     unsigned int        real_num_tx_queues;
261     /* 排队规则 */
262     struct Qdisc        *qdisc;
263 #ifdef CONFIG_NET_SCHED
264     DECLARE_HASHTABLE    (qdisc_hash, 4);
265 #endif
266     /* 
267         可在设备发送队列中排队的最大数据包数
268     */
269     unsigned long        tx_queue_len;
270     spinlock_t        tx_global_lock;
271 
272     /*     网络层确定传输超时,
273         调用驱动程序tx_timeout接口的最短时间 
274     */
275     int            watchdog_timeo;
276 
277 #ifdef CONFIG_XPS
278     struct xps_dev_maps __rcu *xps_maps;
279 #endif
280 #ifdef CONFIG_NET_CLS_ACT
281     struct tcf_proto __rcu  *egress_cl_list;
282 #endif
283 
284     /* These may be needed for future network-power-down code. */
285     /* watchdog定时器 */
286     struct timer_list    watchdog_timer;
287 
288     /* 引用计数 */
289     int __percpu        *pcpu_refcnt;
290 
291     /*     网络设备的注册和除名以两步进行,
292         该字段用于处理第二步
293     */
294     struct list_head    todo_list;
295 
296     struct list_head    link_watch_list;
297 
298     /* 设备的注册状态 */
299     enum { NETREG_UNINITIALIZED=0,
300            NETREG_REGISTERED,    /* completed register_netdevice */
301            NETREG_UNREGISTERING,    /* called unregister_netdevice */
302            NETREG_UNREGISTERED,    /* completed unregister todo */
303            NETREG_RELEASED,        /* called free_netdev */
304            NETREG_DUMMY,        /* dummy device for NAPI poll */
305     } reg_state:8;
306 
307     /* 设备要被释放标记 */
308     bool dismantle;
309 
310     enum {
311         RTNL_LINK_INITIALIZED,
312         RTNL_LINK_INITIALIZING,
313     } rtnl_link_state:16;
314 
315     bool needs_free_netdev;
316     void (*priv_destructor)(struct net_device *dev);
317 
318 #ifdef CONFIG_NETPOLL
319     struct netpoll_info __rcu    *npinfo;
320 #endif
321 
322     possible_net_t            nd_net;
323 
324     /* mid-layer private */
325     union {
326         void                    *ml_priv;
327         struct pcpu_lstats __percpu        *lstats;
328         struct pcpu_sw_netstats __percpu    *tstats;
329         struct pcpu_dstats __percpu        *dstats;
330         struct pcpu_vstats __percpu        *vstats;
331     };
332 
333 #if IS_ENABLED(CONFIG_GARP)
334     struct garp_port __rcu    *garp_port;
335 #endif
336 #if IS_ENABLED(CONFIG_MRP)
337     struct mrp_port __rcu    *mrp_port;
338 #endif
339 
340     struct device        dev;
341     const struct attribute_group *sysfs_groups[4];
342     const struct attribute_group *sysfs_rx_queue_group;
343 
344     const struct rtnl_link_ops *rtnl_link_ops;
345 
346     /* for setting kernel sock attribute on TCP connection setup */
347 #define GSO_MAX_SIZE        65536
348     unsigned int        gso_max_size;
349 #define GSO_MAX_SEGS        65535
350     u16            gso_max_segs;
351 
352 #ifdef CONFIG_DCB
353     const struct dcbnl_rtnl_ops *dcbnl_ops;
354 #endif
355     u8            num_tc;
356     struct netdev_tc_txq    tc_to_txq[TC_MAX_QUEUE];
357     u8            prio_tc_map[TC_BITMASK + 1];
358 
359 #if IS_ENABLED(CONFIG_FCOE)
360     unsigned int        fcoe_ddp_xid;
361 #endif
362 #if IS_ENABLED(CONFIG_CGROUP_NET_PRIO)
363     struct netprio_map __rcu *priomap;
364 #endif
365     struct phy_device    *phydev;
366     struct lock_class_key    *qdisc_tx_busylock;
367     struct lock_class_key    *qdisc_running_key;
368     bool            proto_down;
369 };

 

上述net_device结构中的netdev_ops成员对应设备操作函数结构,用来初始化,销毁,开启,关闭,修改某些变量值等相关操作,结构如下,其中函数操作并未注释,在阅读代码的过程中,遇到具体实现具体分析;

  1 struct net_device_ops {
  2     int            (*ndo_init)(struct net_device *dev);
  3     void            (*ndo_uninit)(struct net_device *dev);
  4     int            (*ndo_open)(struct net_device *dev);
  5     int            (*ndo_stop)(struct net_device *dev);
  6     netdev_tx_t        (*ndo_start_xmit)(struct sk_buff *skb,
  7                           struct net_device *dev);
  8     netdev_features_t    (*ndo_features_check)(struct sk_buff *skb,
  9                               struct net_device *dev,
 10                               netdev_features_t features);
 11     u16            (*ndo_select_queue)(struct net_device *dev,
 12                             struct sk_buff *skb,
 13                             void *accel_priv,
 14                             select_queue_fallback_t fallback);
 15     void            (*ndo_change_rx_flags)(struct net_device *dev,
 16                                int flags);
 17     void            (*ndo_set_rx_mode)(struct net_device *dev);
 18     int            (*ndo_set_mac_address)(struct net_device *dev,
 19                                void *addr);
 20     int            (*ndo_validate_addr)(struct net_device *dev);
 21     int            (*ndo_do_ioctl)(struct net_device *dev,
 22                             struct ifreq *ifr, int cmd);
 23     int            (*ndo_set_config)(struct net_device *dev,
 24                               struct ifmap *map);
 25     int            (*ndo_change_mtu)(struct net_device *dev,
 26                           int new_mtu);
 27     int            (*ndo_neigh_setup)(struct net_device *dev,
 28                            struct neigh_parms *);
 29     void            (*ndo_tx_timeout) (struct net_device *dev);
 30 
 31     void            (*ndo_get_stats64)(struct net_device *dev,
 32                            struct rtnl_link_stats64 *storage);
 33     bool            (*ndo_has_offload_stats)(const struct net_device *dev, int attr_id);
 34     int            (*ndo_get_offload_stats)(int attr_id,
 35                              const struct net_device *dev,
 36                              void *attr_data);
 37     struct net_device_stats* (*ndo_get_stats)(struct net_device *dev);
 38 
 39     int            (*ndo_vlan_rx_add_vid)(struct net_device *dev,
 40                                __be16 proto, u16 vid);
 41     int            (*ndo_vlan_rx_kill_vid)(struct net_device *dev,
 42                                 __be16 proto, u16 vid);
 43 #ifdef CONFIG_NET_POLL_CONTROLLER
 44     void                    (*ndo_poll_controller)(struct net_device *dev);
 45     int            (*ndo_netpoll_setup)(struct net_device *dev,
 46                              struct netpoll_info *info);
 47     void            (*ndo_netpoll_cleanup)(struct net_device *dev);
 48 #endif
 49     int            (*ndo_set_vf_mac)(struct net_device *dev,
 50                           int queue, u8 *mac);
 51     int            (*ndo_set_vf_vlan)(struct net_device *dev,
 52                            int queue, u16 vlan,
 53                            u8 qos, __be16 proto);
 54     int            (*ndo_set_vf_rate)(struct net_device *dev,
 55                            int vf, int min_tx_rate,
 56                            int max_tx_rate);
 57     int            (*ndo_set_vf_spoofchk)(struct net_device *dev,
 58                                int vf, bool setting);
 59     int            (*ndo_set_vf_trust)(struct net_device *dev,
 60                             int vf, bool setting);
 61     int            (*ndo_get_vf_config)(struct net_device *dev,
 62                              int vf,
 63                              struct ifla_vf_info *ivf);
 64     int            (*ndo_set_vf_link_state)(struct net_device *dev,
 65                              int vf, int link_state);
 66     int            (*ndo_get_vf_stats)(struct net_device *dev,
 67                             int vf,
 68                             struct ifla_vf_stats
 69                             *vf_stats);
 70     int            (*ndo_set_vf_port)(struct net_device *dev,
 71                            int vf,
 72                            struct nlattr *port[]);
 73     int            (*ndo_get_vf_port)(struct net_device *dev,
 74                            int vf, struct sk_buff *skb);
 75     int            (*ndo_set_vf_guid)(struct net_device *dev,
 76                            int vf, u64 guid,
 77                            int guid_type);
 78     int            (*ndo_set_vf_rss_query_en)(
 79                            struct net_device *dev,
 80                            int vf, bool setting);
 81     int            (*ndo_setup_tc)(struct net_device *dev,
 82                         u32 handle,
 83                         __be16 protocol,
 84                         struct tc_to_netdev *tc);
 85 #if IS_ENABLED(CONFIG_FCOE)
 86     int            (*ndo_fcoe_enable)(struct net_device *dev);
 87     int            (*ndo_fcoe_disable)(struct net_device *dev);
 88     int            (*ndo_fcoe_ddp_setup)(struct net_device *dev,
 89                               u16 xid,
 90                               struct scatterlist *sgl,
 91                               unsigned int sgc);
 92     int            (*ndo_fcoe_ddp_done)(struct net_device *dev,
 93                              u16 xid);
 94     int            (*ndo_fcoe_ddp_target)(struct net_device *dev,
 95                                u16 xid,
 96                                struct scatterlist *sgl,
 97                                unsigned int sgc);
 98     int            (*ndo_fcoe_get_hbainfo)(struct net_device *dev,
 99                             struct netdev_fcoe_hbainfo *hbainfo);
100 #endif
101 
102 #if IS_ENABLED(CONFIG_LIBFCOE)
103 #define NETDEV_FCOE_WWNN 0
104 #define NETDEV_FCOE_WWPN 1
105     int            (*ndo_fcoe_get_wwn)(struct net_device *dev,
106                             u64 *wwn, int type);
107 #endif
108 
109 #ifdef CONFIG_RFS_ACCEL
110     int            (*ndo_rx_flow_steer)(struct net_device *dev,
111                              const struct sk_buff *skb,
112                              u16 rxq_index,
113                              u32 flow_id);
114 #endif
115     int            (*ndo_add_slave)(struct net_device *dev,
116                          struct net_device *slave_dev);
117     int            (*ndo_del_slave)(struct net_device *dev,
118                          struct net_device *slave_dev);
119     netdev_features_t    (*ndo_fix_features)(struct net_device *dev,
120                             netdev_features_t features);
121     int            (*ndo_set_features)(struct net_device *dev,
122                             netdev_features_t features);
123     int            (*ndo_neigh_construct)(struct net_device *dev,
124                                struct neighbour *n);
125     void            (*ndo_neigh_destroy)(struct net_device *dev,
126                              struct neighbour *n);
127 
128     int            (*ndo_fdb_add)(struct ndmsg *ndm,
129                            struct nlattr *tb[],
130                            struct net_device *dev,
131                            const unsigned char *addr,
132                            u16 vid,
133                            u16 flags);
134     int            (*ndo_fdb_del)(struct ndmsg *ndm,
135                            struct nlattr *tb[],
136                            struct net_device *dev,
137                            const unsigned char *addr,
138                            u16 vid);
139     int            (*ndo_fdb_dump)(struct sk_buff *skb,
140                         struct netlink_callback *cb,
141                         struct net_device *dev,
142                         struct net_device *filter_dev,
143                         int *idx);
144 
145     int            (*ndo_bridge_setlink)(struct net_device *dev,
146                               struct nlmsghdr *nlh,
147                               u16 flags);
148     int            (*ndo_bridge_getlink)(struct sk_buff *skb,
149                               u32 pid, u32 seq,
150                               struct net_device *dev,
151                               u32 filter_mask,
152                               int nlflags);
153     int            (*ndo_bridge_dellink)(struct net_device *dev,
154                               struct nlmsghdr *nlh,
155                               u16 flags);
156     int            (*ndo_change_carrier)(struct net_device *dev,
157                               bool new_carrier);
158     int            (*ndo_get_phys_port_id)(struct net_device *dev,
159                             struct netdev_phys_item_id *ppid);
160     int            (*ndo_get_phys_port_name)(struct net_device *dev,
161                               char *name, size_t len);
162     void            (*ndo_udp_tunnel_add)(struct net_device *dev,
163                               struct udp_tunnel_info *ti);
164     void            (*ndo_udp_tunnel_del)(struct net_device *dev,
165                               struct udp_tunnel_info *ti);
166     void*            (*ndo_dfwd_add_station)(struct net_device *pdev,
167                             struct net_device *dev);
168     void            (*ndo_dfwd_del_station)(struct net_device *pdev,
169                             void *priv);
170 
171     netdev_tx_t        (*ndo_dfwd_start_xmit) (struct sk_buff *skb,
172                             struct net_device *dev,
173                             void *priv);
174     int            (*ndo_get_lock_subclass)(struct net_device *dev);
175     int            (*ndo_set_tx_maxrate)(struct net_device *dev,
176                               int queue_index,
177                               u32 maxrate);
178     int            (*ndo_get_iflink)(const struct net_device *dev);
179     int            (*ndo_change_proto_down)(struct net_device *dev,
180                              bool proto_down);
181     int            (*ndo_fill_metadata_dst)(struct net_device *dev,
182                                struct sk_buff *skb);
183     void            (*ndo_set_rx_headroom)(struct net_device *dev,
184                                int needed_headroom);
185     int            (*ndo_xdp)(struct net_device *dev,
186                        struct netdev_xdp *xdp);
187 };

 

上述net_device结构中的header_ops成员用来进行链路头部操作,邻居子系统在发送数据包时会用到该结构的成员函数,比如以太网的实现,会通过cache函数将以太头缓存到邻居子系统的hh中,数据包发送前,直接拷贝缓存的以太头即可,无需重新组装;

 1 struct header_ops {
 2     int    (*create) (struct sk_buff *skb, struct net_device *dev,
 3                unsigned short type, const void *daddr,
 4                const void *saddr, unsigned int len);
 5     int    (*parse)(const struct sk_buff *skb, unsigned char *haddr);
 6     int    (*cache)(const struct neighbour *neigh, struct hh_cache *hh, __be16 type);
 7     void    (*cache_update)(struct hh_cache *hh,
 8                 const struct net_device *dev,
 9                 const unsigned char *haddr);
10     bool    (*validate)(const char *ll_header, unsigned int len);
11 };

 

posted @ 2017-09-15 14:00  AlexAlex  阅读(9419)  评论(0编辑  收藏  举报