代码改变世界

八 ofproto--upcall处理线程

2017-03-24 17:39  yrpapa  阅读(995)  评论(0)    收藏  举报

一 ofproto层通过ofproto_class类(实现是ofproto_dpif_class)实现了openflow的接口,它主要包括如下几个接口类对象:

  • ofproto 代表了一个openflow switch的具体实现,是ofproto层的整体结构体;
  • ofport代表了一个openflow switch的端口,关联一个netdev设备;
  • ofrule代表了一条openflow规则,rule里面包含一组actions;
  • ofgroup代表了一个openflow的行为组合,openflow 1.1+以上版本支持;

 

实现类中的up成员代表父类,利用CONTAINER_OF宏可以用up获得实现类对象

ofproto创建流程

1 初始化ofproto

static void
bridge_init_ofproto(const struct ovsrec_open_vswitch *cfg)
{
    struct shash iface_hints;
    static bool initialized = false;
    int i;

    if (initialized) {
        return;
    }    

    shash_init(&iface_hints);

    if (cfg) {
        for (i = 0; i < cfg->n_bridges; i++) {
            const struct ovsrec_bridge *br_cfg = cfg->bridges[i];
            int j;

            for (j = 0; j < br_cfg->n_ports; j++) {
                struct ovsrec_port *port_cfg = br_cfg->ports[j];
                int k;

                for (k = 0; k < port_cfg->n_interfaces; k++) {
                    struct ovsrec_interface *if_cfg = port_cfg->interfaces[k];
                    struct iface_hint *iface_hint;

                    iface_hint = xmalloc(sizeof *iface_hint);
                    iface_hint->br_name = br_cfg->name;
                    iface_hint->br_type = br_cfg->datapath_type;
                    iface_hint->ofp_port = iface_pick_ofport(if_cfg);

                    shash_add(&iface_hints, if_cfg->name, iface_hint);
                }
            }
        }

    ofproto_init(&iface_hints); // 创建ofproto

    shash_destroy_free_data(&iface_hints);
    initialized = true;
}
ofproto/ofproto-dpif.c
const
struct ofproto_class ofproto_dpif_class = { init, enumerate_types, enumerate_names, del, port_open_type, type_run, type_wait, alloc, construct, destruct, dealloc, run, ofproto_dpif_wait, NULL, /* get_memory_usage. */ type_get_memory_usage, flush, query_tables, set_tables_version, port_alloc, port_construct, port_destruct, port_dealloc, port_modified, port_reconfigured, port_query_by_name, port_add, port_del, port_set_config, port_get_stats, port_dump_start, port_dump_next, port_dump_done, port_poll, port_poll_wait, port_is_lacp_current, port_get_lacp_stats, NULL, /* rule_choose_table */ rule_alloc, rule_construct, rule_insert, NULL, /* rule_delete */ rule_destruct, rule_dealloc, rule_get_stats, rule_execute, set_frag_handling, packet_out, nxt_resume, set_netflow, get_netflow_ids, set_sflow, set_ipfix, get_ipfix_stats, set_cfm, cfm_status_changed, get_cfm_status, set_lldp, get_lldp_status, set_aa, aa_mapping_set, aa_mapping_unset, aa_vlan_get_queued, aa_vlan_get_queue_size, set_bfd, bfd_status_changed, get_bfd_status, set_stp, get_stp_status, set_stp_port, get_stp_port_status, get_stp_port_stats, set_rstp, get_rstp_status, set_rstp_port, get_rstp_port_status, set_queues, bundle_set, bundle_remove, mirror_set__, mirror_get_stats__, set_flood_vlans, is_mirror_output_bundle, forward_bpdu_changed, set_mac_table_config, set_mcast_snooping, set_mcast_snooping_port, NULL, /* meter_get_features */ NULL, /* meter_set */ NULL, /* meter_get */ NULL, /* meter_del */ group_alloc, /* group_alloc */ group_construct, /* group_construct */ group_destruct, /* group_destruct */ group_dealloc, /* group_dealloc */ NULL, /* group_modify */ group_get_stats, /* group_get_stats */ get_datapath_version, /* get_datapath_version */ ct_flush, /* ct_flush */ };

 

void            
ofproto_init(const struct shash *iface_hints)
{                   
    struct shash_node *node;
    size_t i;
    
    ofproto_class_register(&ofproto_dpif_class); // 注册实现--ofproto-dpif

    /* Make a local copy, since we don't own 'iface_hints' elements. */
    SHASH_FOR_EACH(node, iface_hints) {
        const struct iface_hint *orig_hint = node->data;
        struct iface_hint *new_hint = xmalloc(sizeof *new_hint);
        const char *br_type = ofproto_normalize_type(orig_hint->br_type);

        new_hint->br_name = xstrdup(orig_hint->br_name);
        new_hint->br_type = xstrdup(br_type);
        new_hint->ofp_port = orig_hint->ofp_port;

        shash_add(&init_ofp_ports, node->name, new_hint);
    }

    for (i = 0; i < n_ofproto_classes; i++) {
        ofproto_classes[i]->init(&init_ofp_ports); // 调用注册的实现的init方法
    }

    ofproto_unixctl_init();
}
int
ofproto_class_register(const struct ofproto_class *new_class)
{
    size_t i;

    for (i = 0; i < n_ofproto_classes; i++) {
        if (ofproto_classes[i] == new_class) {
            return EEXIST;
        }
    }    

    if (n_ofproto_classes >= allocated_ofproto_classes) {
        ofproto_classes = x2nrealloc(ofproto_classes,
                                     &allocated_ofproto_classes,
                                     sizeof *ofproto_classes); // ofproto_classes数组空间扩展×2
    }    
    ofproto_classes[n_ofproto_classes++] = new_class;
    return 0;
}
static void
init(const struct shash *iface_hints)
{
    struct shash_node *node;

    /* Make a local copy, since we don't own 'iface_hints' elements. */
    SHASH_FOR_EACH(node, iface_hints) {
        const struct iface_hint *orig_hint = node->data;
        struct iface_hint *new_hint = xmalloc(sizeof *new_hint);

        new_hint->br_name = xstrdup(orig_hint->br_name);
        new_hint->br_type = xstrdup(orig_hint->br_type);
        new_hint->ofp_port = orig_hint->ofp_port;

        shash_add(&init_ofp_ports, node->name, new_hint);
    }

    ofproto_unixctl_init();
    udpif_init();
}

2 创建ofproto

static void 
bridge_reconfigure(const struct ovsrec_open_vswitch *ovs_cfg)
{
    struct sockaddr_in *managers;
    struct bridge *br, *next;
    int sflow_bridge_number;
    size_t n_managers;

    COVERAGE_INC(bridge_reconfigure);

    ofproto_set_flow_limit(smap_get_int(&ovs_cfg->other_config, "flow-limit",
                                        OFPROTO_FLOW_LIMIT_DEFAULT));
    ofproto_set_max_idle(smap_get_int(&ovs_cfg->other_config, "max-idle",
                                      OFPROTO_MAX_IDLE_DEFAULT));
    ofproto_set_cpu_mask(smap_get(&ovs_cfg->other_config, "pmd-cpu-mask"));

    ofproto_set_threads(
        smap_get_int(&ovs_cfg->other_config, "n-handler-threads", 0),
        smap_get_int(&ovs_cfg->other_config, "n-revalidator-threads", 0)); 

    /* Destroy "struct bridge"s, "struct port"s, and "struct iface"s according
     * to 'ovs_cfg', with only very minimal configuration otherwise.
     *
     * This is mostly an update to bridge data structures. Nothing is pushed
     * down to ofproto or lower layers. */
    add_del_bridges(ovs_cfg);
    HMAP_FOR_EACH (br, node, &all_bridges) {
        bridge_collect_wanted_ports(br, &br->wanted_ports);
        bridge_del_ports(br, &br->wanted_ports);
    }    

    /* Start pushing configuration changes down to the ofproto layer:
     *
     *   - Delete ofprotos that are no longer configured.
     *
     *   - Delete ports that are no longer configured.
     *
     *   - Reconfigure existing ports to their desired configurations, or
     *     delete them if not possible.
     *
     * We have to do all the deletions before we can do any additions, because
     * the ports to be added might require resources that will be freed up by
     * deletions (they might especially overlap in name). */
    bridge_delete_ofprotos();
    HMAP_FOR_EACH (br, node, &all_bridges) {
        if (br->ofproto) {
            bridge_delete_or_reconfigure_ports(br);
        }
    }

    /* Finish pushing configuration changes to the ofproto layer:
     *
     *     - Create ofprotos that are missing.
     *
     *     - Add ports that are missing. */
    HMAP_FOR_EACH_SAFE (br, next, node, &all_bridges) {
        if (!br->ofproto) {
            int error;

            error = ofproto_create(br->name, br->type, &br->ofproto); // 创建ofproto
            if (error) {
                VLOG_ERR("failed to create bridge %s: %s", br->name,
                         ovs_strerror(error));
                shash_destroy(&br->wanted_ports);
                bridge_destroy(br, true);
            } else {
                /* Trigger storing datapath version. */
                seq_change(connectivity_seq_get());
            }
        }
    }
    HMAP_FOR_EACH (br, node, &all_bridges) {
        bridge_add_ports(br, &br->wanted_ports);
        shash_destroy(&br->wanted_ports);
    }

    reconfigure_system_stats(ovs_cfg);

    /* Complete the configuration. */
    sflow_bridge_number = 0;
    collect_in_band_managers(ovs_cfg, &managers, &n_managers);
    HMAP_FOR_EACH (br, node, &all_bridges) {
        struct port *port;

        /* We need the datapath ID early to allow LACP ports to use it as the
         * default system ID. */
        bridge_configure_datapath_id(br);

        HMAP_FOR_EACH (port, hmap_node, &br->ports) {
            struct iface *iface;

            port_configure(port);

            LIST_FOR_EACH (iface, port_elem, &port->ifaces) {
                iface_set_ofport(iface->cfg, iface->ofp_port);
                /* Clear eventual previous errors */
                ovsrec_interface_set_error(iface->cfg, NULL);
                iface_configure_cfm(iface);
                iface_configure_qos(iface, port->cfg->qos);
                iface_set_mac(br, port, iface);
                ofproto_port_set_bfd(br->ofproto, iface->ofp_port,
                                     &iface->cfg->bfd);
                ofproto_port_set_lldp(br->ofproto, iface->ofp_port,
                                      &iface->cfg->lldp);
                ofproto_port_set_config(br->ofproto, iface->ofp_port,
                                        &iface->cfg->other_config);
            }
        }
        bridge_configure_mirrors(br);
        bridge_configure_forward_bpdu(br);
        bridge_configure_mac_table(br);
        bridge_configure_mcast_snooping(br);
        bridge_configure_remotes(br, managers, n_managers);
        bridge_configure_netflow(br);
        bridge_configure_sflow(br, &sflow_bridge_number);
        bridge_configure_ipfix(br);
        bridge_configure_spanning_tree(br);
        bridge_configure_tables(br);
        bridge_configure_dp_desc(br);
        bridge_configure_aa(br);
    }
    free(managers);

    /* The ofproto-dpif provider does some final reconfiguration in its
     * ->type_run() function.  We have to call it before notifying the database
     * client that reconfiguration is complete, otherwise there is a very
     * narrow race window in which e.g. ofproto/trace will not recognize the
     * new configuration (sometimes this causes unit test failures). */
    bridge_run__();
}

 

int
ofproto_create(const char *datapath_name, const char *datapath_type,
               struct ofproto **ofprotop)
    OVS_EXCLUDED(ofproto_mutex)
{
    const struct ofproto_class *class;
    struct ofproto *ofproto;
    int error;
    int i;

    *ofprotop = NULL;

    datapath_type = ofproto_normalize_type(datapath_type);
    class = ofproto_class_find__(datapath_type); // 查找ofproto的实现类,初始化部分设置了ofproto-dpif
    if (!class) {
        VLOG_WARN("could not create datapath %s of unknown type %s",
                  datapath_name, datapath_type);
        return EAFNOSUPPORT;
    }    

    ofproto = class->alloc();
    if (!ofproto) {
        VLOG_ERR("failed to allocate datapath %s of type %s",
                 datapath_name, datapath_type);
        return ENOMEM;
    }    

    /* Initialize. */
    ovs_mutex_lock(&ofproto_mutex);
    memset(ofproto, 0, sizeof *ofproto);
    ofproto->ofproto_class = class;
    ofproto->name = xstrdup(datapath_name);
    ofproto->type = xstrdup(datapath_type);
    hmap_insert(&all_ofprotos, &ofproto->hmap_node,
                hash_string(ofproto->name, 0));
    ofproto->datapath_id = 0;
    ofproto->forward_bpdu = false;
    ofproto->fallback_dpid = pick_fallback_dpid();
    ofproto->mfr_desc = NULL;
    ofproto->hw_desc = NULL;
    ofproto->sw_desc = NULL;
    ofproto->serial_desc = NULL;
    ofproto->dp_desc = NULL;
    ofproto->frag_handling = OFPUTIL_FRAG_NORMAL;
    hmap_init(&ofproto->ports);
    hmap_init(&ofproto->ofport_usage);
    shash_init(&ofproto->port_by_name);
    simap_init(&ofproto->ofp_requests);
    ofproto->max_ports = ofp_to_u16(OFPP_MAX);
    ofproto->eviction_group_timer = LLONG_MIN;
    ofproto->tables = NULL;
    ofproto->n_tables = 0;
    ofproto->tables_version = OVS_VERSION_MIN;
    hindex_init(&ofproto->cookies);
    hmap_init(&ofproto->learned_cookies);
    ovs_list_init(&ofproto->expirable);
    ofproto->connmgr = connmgr_create(ofproto, datapath_name, datapath_name);
    guarded_list_init(&ofproto->rule_executes);
    ofproto->min_mtu = INT_MAX;
    cmap_init(&ofproto->groups);
    ovs_mutex_unlock(&ofproto_mutex);
    ofproto->ogf.types = 0xf;
    ofproto->ogf.capabilities = OFPGFC_CHAINING | OFPGFC_SELECT_LIVENESS |
                                OFPGFC_SELECT_WEIGHT;
    for (i = 0; i < 4; i++) {
        ofproto->ogf.max_groups[i] = OFPG_MAX;
        ofproto->ogf.ofpacts[i] = (UINT64_C(1) << N_OFPACTS) - 1;
    }
    tun_metadata_init();

    error = ofproto->ofproto_class->construct(ofproto);
    if (error) {
        VLOG_ERR("failed to open datapath %s: %s",
                 datapath_name, ovs_strerror(error));
        ovs_mutex_lock(&ofproto_mutex);
        connmgr_destroy(ofproto->connmgr);
        ofproto->connmgr = NULL;
        ovs_mutex_unlock(&ofproto_mutex);
        ofproto_destroy__(ofproto);
        return error;
    }

    /* Check that hidden tables, if any, are at the end. */
    ovs_assert(ofproto->n_tables);
    for (i = 0; i + 1 < ofproto->n_tables; i++) {
        enum oftable_flags flags = ofproto->tables[i].flags;
        enum oftable_flags next_flags = ofproto->tables[i + 1].flags;

        ovs_assert(!(flags & OFTABLE_HIDDEN) || next_flags & OFTABLE_HIDDEN);
    }

    ofproto->datapath_id = pick_datapath_id(ofproto);
    init_ports(ofproto);

    /* Initialize meters table. */
    if (ofproto->ofproto_class->meter_get_features) {
        ofproto->ofproto_class->meter_get_features(ofproto,
                                                   &ofproto->meter_features);
    } else {
        memset(&ofproto->meter_features, 0, sizeof ofproto->meter_features);
    }
    ofproto->meters = xzalloc((ofproto->meter_features.max_meters + 1)
                              * sizeof(struct meter *));

    /* Set the initial tables version. */
    ofproto_bump_tables_version(ofproto);

    *ofprotop = ofproto;
    return 0;
}

 

static const struct ofproto_class *
ofproto_class_find__(const char *type)
{
    size_t i;

    for (i = 0; i < n_ofproto_classes; i++) {
        const struct ofproto_class *class = ofproto_classes[i];
        struct sset types;
        bool found;

        sset_init(&types);
        class->enumerate_types(&types);
        found = sset_contains(&types, type);
        sset_destroy(&types);

        if (found) {
            return class;
        }
    }
    VLOG_WARN("unknown datapath type %s", type);
    return NULL;
}

alloc

static struct ofproto *
alloc(void)
{
    struct ofproto_dpif *ofproto = xzalloc(sizeof *ofproto);
    return &ofproto->up;
}

construct

static int
construct(struct ofproto *ofproto_)
{
    struct ofproto_dpif *ofproto = ofproto_dpif_cast(ofproto_);
    struct shash_node *node, *next;
    int error;

    /* Tunnel module can get used right after the udpif threads are running. */
    ofproto_tunnel_init();

    error = open_dpif_backer(ofproto->up.type, &ofproto->backer); // 调用udpif_set_threads,开启ofproto的upcall线程,接收内核态数据
  /*
    open_dpif_backer-->dpif_create_and_open:ovs-system-->dpif_create-->do_open-->dp_initialize
    注册dpif_netlink_class-->dpif_netlink_class.open-->dpif_netlink_open-->dpif_netlink_dp_transact-->nl_transact
  */
if (error) { return error; } uuid_generate(&ofproto->uuid); atomic_init(&ofproto->tables_version, OVS_VERSION_MIN); ofproto->netflow = NULL; ofproto->sflow = NULL; ofproto->ipfix = NULL; ofproto->stp = NULL; ofproto->rstp = NULL; ofproto->dump_seq = 0; hmap_init(&ofproto->bundles); ofproto->ml = mac_learning_create(MAC_ENTRY_DEFAULT_IDLE_TIME); // 创建mac学习表mac_learning结构 ofproto->ms = NULL; ofproto->mbridge = mbridge_create(); // mbridge是个啥??? ofproto->has_bonded_bundles = false; ofproto->lacp_enabled = false; ovs_mutex_init_adaptive(&ofproto->stats_mutex); guarded_list_init(&ofproto->ams); sset_init(&ofproto->ports); sset_init(&ofproto->ghost_ports); sset_init(&ofproto->port_poll_set); ofproto->port_poll_errno = 0; ofproto->change_seq = 0; ofproto->ams_seq = seq_create(); ofproto->ams_seqno = seq_read(ofproto->ams_seq); SHASH_FOR_EACH_SAFE (node, next, &init_ofp_ports) { struct iface_hint *iface_hint = node->data; if (!strcmp(iface_hint->br_name, ofproto->up.name)) { /* Check if the datapath already has this port. */ if (dpif_port_exists(ofproto->backer->dpif, node->name)) { sset_add(&ofproto->ports, node->name); } free(iface_hint->br_name); free(iface_hint->br_type); free(iface_hint); shash_delete(&init_ofp_ports, node); } } hmap_insert(&all_ofproto_dpifs, &ofproto->all_ofproto_dpifs_node, hash_string(ofproto->up.name, 0)); memset(&ofproto->stats, 0, sizeof ofproto->stats); ofproto_init_tables(ofproto_, N_TABLES); error = add_internal_flows(ofproto); // 添加初始流表 ofproto->up.tables[TBL_INTERNAL].flags = OFTABLE_HIDDEN | OFTABLE_READONLY; return error; }

初始流表

static int
add_internal_flows(struct ofproto_dpif *ofproto)
{
    struct ofpact_controller *controller;
    uint64_t ofpacts_stub[128 / 8];
    struct ofpbuf ofpacts;
    struct rule *unused_rulep OVS_UNUSED;
    struct match match;
    int error;
    int id;

    ofpbuf_use_stack(&ofpacts, ofpacts_stub, sizeof ofpacts_stub);
    id = 1;

    controller = ofpact_put_CONTROLLER(&ofpacts);
    controller->max_len = UINT16_MAX;
    controller->controller_id = 0;
    controller->reason = OFPR_IMPLICIT_MISS;
    ofpact_finish_CONTROLLER(&ofpacts, &controller);

    error = add_internal_miss_flow(ofproto, id++, &ofpacts,
                                   &ofproto->miss_rule);
    if (error) {
        return error;
    }

    ofpbuf_clear(&ofpacts);
    error = add_internal_miss_flow(ofproto, id++, &ofpacts,
                                   &ofproto->no_packet_in_rule);
    if (error) {
        return error;
    }

    error = add_internal_miss_flow(ofproto, id++, &ofpacts,
                                   &ofproto->drop_frags_rule);
    if (error) {
        return error;
    }

    /* Drop any run away non-recirc rule lookups. Recirc_id has to be
     * zero when reaching this rule.
     *
     * (priority=2), recirc_id=0, actions=drop
     */
    ofpbuf_clear(&ofpacts);
    match_init_catchall(&match);
    match_set_recirc_id(&match, 0);
    error = ofproto_dpif_add_internal_flow(ofproto, &match, 2, 0, &ofpacts,
                                           &unused_rulep);
    return error;
}
add_internal_miss_flow(ofproto, id++, &ofpacts, &ofproto->miss_rule) 
经过ofproto_flow_mod-->handle_flow_mod__-->ofproto_flow_mod_init-->add_flow_init-->ofproto_rule_create-->rule_construct
创建如下流表:
cookie=0x0, duration=2265.499s, table=0, n_packets=0, n_bytes=0, idle_age=2265, priority=0 actions=NORMAL

 

init_ports

static int
init_ports(struct ofproto *p)
{
    struct ofproto_port_dump dump;
    struct ofproto_port ofproto_port;
    struct shash_node *node, *next;

    OFPROTO_PORT_FOR_EACH (&ofproto_port, &dump, p) {
        const char *name = ofproto_port.name;

        if (shash_find(&p->port_by_name, name)) {
            VLOG_WARN_RL(&rl, "%s: ignoring duplicate device %s in datapath",
                         p->name, name);
        } else {
            struct ofputil_phy_port pp;
            struct netdev *netdev;

            /* Check if an OpenFlow port number had been requested. */
            node = shash_find(&init_ofp_ports, name);
            if (node) {
                const struct iface_hint *iface_hint = node->data;
                simap_put(&p->ofp_requests, name,
                          ofp_to_u16(iface_hint->ofp_port));
            }

            netdev = ofport_open(p, &ofproto_port, &pp);
            if (netdev) {
                ofport_install(p, netdev, &pp);
                if (ofp_to_u16(ofproto_port.ofp_port) < p->max_ports) {
                    p->alloc_port_no = MAX(p->alloc_port_no,
                                           ofp_to_u16(ofproto_port.ofp_port));
                }
            }
        }
    }

    SHASH_FOR_EACH_SAFE(node, next, &init_ofp_ports) {
        struct iface_hint *iface_hint = node->data;

        if (!strcmp(iface_hint->br_name, p->name)) {
            free(iface_hint->br_name);
            free(iface_hint->br_type);
            free(iface_hint);
            shash_delete(&init_ofp_ports, node);
        }
    }

    return 0;
}

 

static struct netdev *
ofport_open(struct ofproto *ofproto,
            struct ofproto_port *ofproto_port,
            struct ofputil_phy_port *pp)
{
    enum netdev_flags flags;
    struct netdev *netdev;
    int error;

    error = netdev_open(ofproto_port->name, ofproto_port->type, &netdev);
    if (error) {
        VLOG_WARN_RL(&rl, "%s: ignoring port %s (%"PRIu16") because netdev %s "
                     "cannot be opened (%s)",
                     ofproto->name,
                     ofproto_port->name, ofproto_port->ofp_port,
                     ofproto_port->name, ovs_strerror(error));
        return NULL;
    }

    if (ofproto_port->ofp_port == OFPP_NONE) {
        if (!strcmp(ofproto->name, ofproto_port->name)) {
            ofproto_port->ofp_port = OFPP_LOCAL;
        } else {
            ofproto_port->ofp_port = alloc_ofp_port(ofproto,
                                                    ofproto_port->name);
        }
    }
    pp->port_no = ofproto_port->ofp_port;
    netdev_get_etheraddr(netdev, &pp->hw_addr);
    ovs_strlcpy(pp->name, ofproto_port->name, sizeof pp->name);
    netdev_get_flags(netdev, &flags);
    pp->config = flags & NETDEV_UP ? 0 : OFPUTIL_PC_PORT_DOWN;
    pp->state = netdev_get_carrier(netdev) ? 0 : OFPUTIL_PS_LINK_DOWN;
    netdev_get_features(netdev, &pp->curr, &pp->advertised,
                        &pp->supported, &pp->peer);
    pp->curr_speed = netdev_features_to_bps(pp->curr, 0) / 1000;
    pp->max_speed = netdev_features_to_bps(pp->supported, 0) / 1000;

    return netdev;
}

 

int
netdev_open(const char *name, const char *type, struct netdev **netdevp)
    OVS_EXCLUDED(netdev_mutex)
{
    struct netdev *netdev;
    int error;

    netdev_initialize();

    ovs_mutex_lock(&netdev_mutex);
    netdev = shash_find_data(&netdev_shash, name);
    if (!netdev) {
        struct netdev_registered_class *rc; 

        rc = netdev_lookup_class(type && type[0] ? type : "system");
        if (rc && ovs_refcount_try_ref_rcu(&rc->refcnt)) {
            netdev = rc->class->alloc();
            if (netdev) {
                memset(netdev, 0, sizeof *netdev);
                netdev->netdev_class = rc->class;
                netdev->name = xstrdup(name);
                netdev->change_seq = 1; 
                netdev->reconfigure_seq = seq_create();
                netdev->last_reconfigure_seq =
                    seq_read(netdev->reconfigure_seq);
                netdev->node = shash_add(&netdev_shash, name, netdev);

                /* By default enable one tx and rx queue per netdev. */
                netdev->n_txq = netdev->netdev_class->send ? 1 : 0; 
                netdev->n_rxq = netdev->netdev_class->rxq_alloc ? 1 : 0; 

                ovs_list_init(&netdev->saved_flags_list);

                error = rc->class->construct(netdev);
                if (!error) {
                    netdev_change_seq_changed(netdev);
                } else {
                    ovs_refcount_unref(&rc->refcnt);
                    seq_destroy(netdev->reconfigure_seq);
                    free(netdev->name);
                    ovs_assert(ovs_list_is_empty(&netdev->saved_flags_list));
                    shash_delete(&netdev_shash, netdev->node);
                    rc->class->dealloc(netdev);
                }
            } else {
                error = ENOMEM;
            }
        } else {
            VLOG_WARN("could not create netdev %s of unknown type %s",
                      name, type);
            error = EAFNOSUPPORT;
        }
    } else {
        error = 0;
    }

    if (!error) {
        netdev->ref_cnt++;
        *netdevp = netdev;
    } else {
        *netdevp = NULL;
    }
    ovs_mutex_unlock(&netdev_mutex);

    return error;
}

注册netdevice

static void 
netdev_initialize(void)
    OVS_EXCLUDED(netdev_mutex)
{
    static struct ovsthread_once once = OVSTHREAD_ONCE_INITIALIZER;

    if (ovsthread_once_start(&once)) {
        fatal_signal_add_hook(restore_all_flags, NULL, NULL, true);

        netdev_vport_patch_register();

#ifdef __linux__
        netdev_register_provider(&netdev_linux_class);
        netdev_register_provider(&netdev_internal_class);
        netdev_register_provider(&netdev_tap_class);
        netdev_vport_tunnel_register();
#endif
#if defined(__FreeBSD__) || defined(__NetBSD__)
        netdev_register_provider(&netdev_tap_class);
        netdev_register_provider(&netdev_bsd_class);
#endif
#ifdef _WIN32
        netdev_register_provider(&netdev_windows_class);
        netdev_register_provider(&netdev_internal_class);
        netdev_vport_tunnel_register();
#endif
        ovsthread_once_done(&once);
    }    
}

 

以system为例

const struct netdev_class netdev_linux_class =
    NETDEV_LINUX_CLASS(
        "system",
        netdev_linux_construct,
        netdev_linux_get_stats,
        netdev_linux_get_features,
        netdev_linux_get_status);

construct

static int
netdev_linux_construct(struct netdev *netdev_)
{
    struct netdev_linux *netdev = netdev_linux_cast(netdev_);
    int error;

    netdev_linux_common_construct(netdev);

    error = get_flags(&netdev->up, &netdev->ifi_flags);
    if (error == ENODEV) {
        if (netdev->up.netdev_class != &netdev_internal_class) {
            /* The device does not exist, so don't allow it to be opened. */
            return ENODEV;
        } else {
            /* "Internal" netdevs have to be created as netdev objects before
             * they exist in the kernel, because creating them in the kernel
             * happens by passing a netdev object to dpif_port_add().
             * Therefore, ignore the error. */
        }
    }

    return 0;
}

ofport_install

static int
ofport_install(struct ofproto *p,
               struct netdev *netdev, const struct ofputil_phy_port *pp)
{
    const char *netdev_name = netdev_get_name(netdev);
    struct ofport *ofport;
    int error;

    /* Create ofport. */
    ofport = p->ofproto_class->port_alloc();
    if (!ofport) {
        error = ENOMEM;
        goto error;
    }
    ofport->ofproto = p;
    ofport->netdev = netdev;
    ofport->change_seq = netdev_get_change_seq(netdev);
    ofport->pp = *pp;
    ofport->ofp_port = pp->port_no;
    ofport->created = time_msec();

    /* Add port to 'p'. */
    hmap_insert(&p->ports, &ofport->hmap_node,
                hash_ofp_port(ofport->ofp_port));
    shash_add(&p->port_by_name, netdev_name, ofport);

    update_mtu(p, ofport);

    /* Let the ofproto_class initialize its private data. */
    error = p->ofproto_class->port_construct(ofport);
    if (error) {
        goto error;
    }
    connmgr_send_port_status(p->connmgr, NULL, pp, OFPPR_ADD);
    return 0;

error:
    VLOG_WARN_RL(&rl, "%s: could not add port %s (%s)",
                 p->name, netdev_name, ovs_strerror(error));
    if (ofport) {
        ofport_destroy__(ofport);
    } else {
        netdev_close(netdev);
    }
    return error;
}

 

port_alloc

static struct ofport *
port_alloc(void)
{
    struct ofport_dpif *port = xzalloc(sizeof *port);
    return &port->up;
}

 

port_construct

static int
port_construct(struct ofport *port_)
{
    struct ofport_dpif *port = ofport_dpif_cast(port_);
    struct ofproto_dpif *ofproto = ofproto_dpif_cast(port->up.ofproto);
    const struct netdev *netdev = port->up.netdev;
    char namebuf[NETDEV_VPORT_NAME_BUFSIZE];
    const char *dp_port_name;
    struct dpif_port dpif_port;
    int error;

    ofproto->backer->need_revalidate = REV_RECONFIGURE;
    port->bundle = NULL;
    port->cfm = NULL;
    port->bfd = NULL;
    port->lldp = NULL;
    port->may_enable = false;
    port->stp_port = NULL;
    port->stp_state = STP_DISABLED;
    port->rstp_port = NULL;
    port->rstp_state = RSTP_DISABLED;
    port->is_tunnel = false;
    port->peer = NULL;
    port->qdscp = NULL;
    port->n_qdscp = 0;
    port->carrier_seq = netdev_get_carrier_resets(netdev);
    port->is_layer3 = netdev_vport_is_layer3(netdev);

    if (netdev_vport_is_patch(netdev)) {
        /* By bailing out here, we don't submit the port to the sFlow module
         * to be considered for counter polling export.  This is correct
         * because the patch port represents an interface that sFlow considers
         * to be "internal" to the switch as a whole, and therefore not a
         * candidate for counter polling. */
        port->odp_port = ODPP_NONE;
        ofport_update_peer(port);
        return 0;
    }

    dp_port_name = netdev_vport_get_dpif_port(netdev, namebuf, sizeof namebuf);
    error = dpif_port_query_by_name(ofproto->backer->dpif, dp_port_name,
                                    &dpif_port);
    if (error) {
        return error;
    }

    port->odp_port = dpif_port.port_no;

    if (netdev_get_tunnel_config(netdev)) {
        atomic_count_inc(&ofproto->backer->tnl_count);
        error = tnl_port_add(port, port->up.netdev, port->odp_port,
                             ovs_native_tunneling_is_on(ofproto), dp_port_name);
        if (error) {
            atomic_count_dec(&ofproto->backer->tnl_count);
            dpif_port_destroy(&dpif_port);
            return error;
        }

        port->is_tunnel = true;
        if (ofproto->ipfix) {
           dpif_ipfix_add_tunnel_port(ofproto->ipfix, port_, port->odp_port);
        }
    } else {
        /* Sanity-check that a mapping doesn't already exist.  This
         * shouldn't happen for non-tunnel ports. */
        if (odp_port_to_ofp_port(ofproto, port->odp_port) != OFPP_NONE) {
            VLOG_ERR("port %s already has an OpenFlow port number",
                     dpif_port.name);
            dpif_port_destroy(&dpif_port);
            return EBUSY;
        }

        ovs_rwlock_wrlock(&ofproto->backer->odp_to_ofport_lock);
        hmap_insert(&ofproto->backer->odp_to_ofport_map, &port->odp_port_node,
                    hash_odp_port(port->odp_port));
        ovs_rwlock_unlock(&ofproto->backer->odp_to_ofport_lock);
    }
    dpif_port_destroy(&dpif_port);

    if (ofproto->sflow) {
        dpif_sflow_add_port(ofproto->sflow, port_, port->odp_port);
    }

    return 0;
}

 

二 udpif接口层采用多个线程处理内核发往用户层的upcall请求,入口函数为udpif_set_threads(),主要处理流程如下:

struct udpif {
    struct ovs_list list_node;         /* In all_udpifs list. */

    struct dpif *dpif;                 /* Datapath handle. */
    struct dpif_backer *backer;        /* Opaque dpif_backer pointer. */

    struct handler *handlers;          /* Upcall handlers. */
    size_t n_handlers;

    struct revalidator *revalidators;  /* Flow revalidators. */
    size_t n_revalidators;

    struct latch exit_latch;           /* Tells child threads to exit. */

    /* Revalidation. */
    struct seq *reval_seq;             /* Incremented to force revalidation. */
    bool reval_exit;                   /* Set by leader on 'exit_latch. */
    struct ovs_barrier reval_barrier;  /* Barrier used by revalidators. */
    struct dpif_flow_dump *dump;       /* DPIF flow dump state. */
    long long int dump_duration;       /* Duration of the last flow dump. */
    struct seq *dump_seq;              /* Increments each dump iteration. */
    atomic_bool enable_ufid;           /* If true, skip dumping flow attrs. */

    /* These variables provide a mechanism for the main thread to pause
     * all revalidation without having to completely shut the threads down.
     * 'pause_latch' is shared between the main thread and the lead
     * revalidator thread, so when it is desirable to halt revalidation, the
     * main thread will set the latch. 'pause' and 'pause_barrier' are shared
     * by revalidator threads. The lead revalidator will set 'pause' when it
     * observes the latch has been set, and this will cause all revalidator
     * threads to wait on 'pause_barrier' at the beginning of the next
     * revalidation round. */
    bool pause;                        /* Set by leader on 'pause_latch. */
    struct latch pause_latch;          /* Set to force revalidators pause. */
struct ovsthread_aux {
    void *(*start)(void *); 
    void *arg;
    char name[16];
};
void
udpif_set_threads(struct udpif *udpif, size_t n_handlers,
                  size_t n_revalidators)
{
    ovs_assert(udpif);
    ovs_assert(n_handlers && n_revalidators);

    ovsrcu_quiesce_start();
    if (udpif->n_handlers != n_handlers
        || udpif->n_revalidators != n_revalidators) {
        udpif_stop_threads(udpif);
    }    

    if (!udpif->handlers && !udpif->revalidators) {
        int error;

        error = dpif_handlers_set(udpif->dpif, n_handlers);
        if (error) {
            VLOG_ERR("failed to configure handlers in dpif %s: %s",
                     dpif_name(udpif->dpif), ovs_strerror(error));
            return;
        }

        udpif_start_threads(udpif, n_handlers, n_revalidators); // 启动线程
    }    
    ovsrcu_quiesce_end();
}

 

static void
udpif_start_threads(struct udpif *udpif, size_t n_handlers,
                    size_t n_revalidators)
{
    if (udpif && n_handlers && n_revalidators) {
        size_t i;
        bool enable_ufid;

        udpif->n_handlers = n_handlers;
        udpif->n_revalidators = n_revalidators;

        udpif->handlers = xzalloc(udpif->n_handlers * sizeof *udpif->handlers);
        for (i = 0; i < udpif->n_handlers; i++) {
            struct handler *handler = &udpif->handlers[i];

            handler->udpif = udpif;
            handler->handler_id = i;
            handler->thread = ovs_thread_create(
                "handler", udpif_upcall_handler, handler); // 创建handler线程,线程方法udpif_upcall_handler
        }

        enable_ufid = ofproto_dpif_get_enable_ufid(udpif->backer);
        atomic_init(&udpif->enable_ufid, enable_ufid);
        dpif_enable_upcall(udpif->dpif);

        ovs_barrier_init(&udpif->reval_barrier, udpif->n_revalidators);
        ovs_barrier_init(&udpif->pause_barrier, udpif->n_revalidators + 1);
        udpif->reval_exit = false;
        udpif->pause = false;
        udpif->revalidators = xzalloc(udpif->n_revalidators
                                      * sizeof *udpif->revalidators);
        for (i = 0; i < udpif->n_revalidators; i++) {
            struct revalidator *revalidator = &udpif->revalidators[i];

            revalidator->udpif = udpif;
            revalidator->thread = ovs_thread_create(
                "revalidator", udpif_revalidator, revalidator); // 创建revalidator线程,线程方法udpif_revalidator,有什么用?
        }
    }
}
struct handler {
    struct udpif *udpif;               /* Parent udpif. */
    pthread_t thread;                  /* Thread ID. */
    uint32_t handler_id;               /* Handler id. */
};
static void *
udpif_upcall_handler(void *arg)
{
    struct handler *handler = arg;
    struct udpif *udpif = handler->udpif;

    while (!latch_is_set(&handler->udpif->exit_latch)) {
        if (recv_upcalls(handler)) {
            poll_immediate_wake();
        } else {
            dpif_recv_wait(udpif->dpif, handler->handler_id);
            latch_wait(&udpif->exit_latch);
        }
        poll_block();
    }

    return NULL;
}

 

bool
latch_is_set(const struct latch *latch)
{
    struct pollfd pfd;
    int retval;

    pfd.fd = latch->fds[0];
    pfd.events = POLLIN;
    do {
        retval = poll(&pfd, 1, 0); 
    } while (retval < 0 && errno == EINTR);

    return pfd.revents & POLLIN;
}

 

 

revalidator

static void *
udpif_revalidator(void *arg)
{
    /* Used by all revalidators. */
    struct revalidator *revalidator = arg;
    struct udpif *udpif = revalidator->udpif;
    bool leader = revalidator == &udpif->revalidators[0];

    /* Used only by the leader. */
    long long int start_time = 0;
    uint64_t last_reval_seq = 0;
    size_t n_flows = 0;

    revalidator->id = ovsthread_id_self();
    for (;;) {
        if (leader) {
            uint64_t reval_seq;

            recirc_run(); /* Recirculation cleanup. */

            reval_seq = seq_read(udpif->reval_seq);
            last_reval_seq = reval_seq;

            n_flows = udpif_get_n_flows(udpif);
            udpif->max_n_flows = MAX(n_flows, udpif->max_n_flows);
            udpif->avg_n_flows = (udpif->avg_n_flows + n_flows) / 2;

            /* Only the leader checks the pause latch to prevent a race where
             * some threads think it's false and proceed to block on
             * reval_barrier and others think it's true and block indefinitely
             * on the pause_barrier */
            udpif->pause = latch_is_set(&udpif->pause_latch);

            /* Only the leader checks the exit latch to prevent a race where
             * some threads think it's true and exit and others think it's
             * false and block indefinitely on the reval_barrier */
            udpif->reval_exit = latch_is_set(&udpif->exit_latch);

            start_time = time_msec();
            if (!udpif->reval_exit) {
                bool terse_dump;

                terse_dump = udpif_use_ufid(udpif);
                udpif->dump = dpif_flow_dump_create(udpif->dpif, terse_dump);
            }
        }

        /* Wait for the leader to start the flow dump. */
        ovs_barrier_block(&udpif->reval_barrier);
        if (udpif->pause) {
            revalidator_pause(revalidator);
        }

        if (udpif->reval_exit) {
            break;
        }
        revalidate(revalidator);

        /* Wait for all flows to have been dumped before we garbage collect. */
        ovs_barrier_block(&udpif->reval_barrier);
        revalidator_sweep(revalidator);

        /* Wait for all revalidators to finish garbage collection. */
        ovs_barrier_block(&udpif->reval_barrier);

        if (leader) {
            unsigned int flow_limit;
            long long int duration;

            atomic_read_relaxed(&udpif->flow_limit, &flow_limit);

            dpif_flow_dump_destroy(udpif->dump);
            seq_change(udpif->dump_seq);

            duration = MAX(time_msec() - start_time, 1);
            udpif->dump_duration = duration;
            if (duration > 2000) {
                flow_limit /= duration / 1000;
            } else if (duration > 1300) {
                flow_limit = flow_limit * 3 / 4;
            } else if (duration < 1000 && n_flows > 2000
                       && flow_limit < n_flows * 1000 / duration) {
                flow_limit += 1000;
            }
            flow_limit = MIN(ofproto_flow_limit, MAX(flow_limit, 1000));
            atomic_store_relaxed(&udpif->flow_limit, flow_limit);

            if (duration > 2000) {
                VLOG_INFO("Spent an unreasonably long %lldms dumping flows",
                          duration);
            }

            poll_timer_wait_until(start_time + MIN(ofproto_max_idle, 500));
            seq_wait(udpif->reval_seq, last_reval_seq);
            latch_wait(&udpif->exit_latch);
            latch_wait(&udpif->pause_latch);
            poll_block();

            if (!latch_is_set(&udpif->pause_latch) &&
                !latch_is_set(&udpif->exit_latch)) {
                long long int now = time_msec();
                /* Block again if we are woken up within 5ms of the last start
                 * time. */
                start_time += 5;

                if (now < start_time) {
                    poll_timer_wait_until(start_time);
                    latch_wait(&udpif->exit_latch);
                    latch_wait(&udpif->pause_latch);
                    poll_block();
                }
            }
        }
    }

    return NULL;
}