八 ofproto--upcall处理线程
2017-03-24 17:39 yrpapa 阅读(995) 评论(0) 收藏 举报一 ofproto层通过ofproto_class类(实现是ofproto_dpif_class)实现了openflow的接口,它主要包括如下几个接口类对象:
- ofproto 代表了一个openflow switch的具体实现,是ofproto层的整体结构体;
- ofport代表了一个openflow switch的端口,关联一个netdev设备;
- ofrule代表了一条openflow规则,rule里面包含一组actions;
- ofgroup代表了一个openflow的行为组合,openflow 1.1+以上版本支持;

实现类中的up成员代表父类,利用CONTAINER_OF宏可以用up获得实现类对象
ofproto创建流程

1 初始化ofproto
static void bridge_init_ofproto(const struct ovsrec_open_vswitch *cfg) { struct shash iface_hints; static bool initialized = false; int i; if (initialized) { return; } shash_init(&iface_hints); if (cfg) { for (i = 0; i < cfg->n_bridges; i++) { const struct ovsrec_bridge *br_cfg = cfg->bridges[i]; int j; for (j = 0; j < br_cfg->n_ports; j++) { struct ovsrec_port *port_cfg = br_cfg->ports[j]; int k; for (k = 0; k < port_cfg->n_interfaces; k++) { struct ovsrec_interface *if_cfg = port_cfg->interfaces[k]; struct iface_hint *iface_hint; iface_hint = xmalloc(sizeof *iface_hint); iface_hint->br_name = br_cfg->name; iface_hint->br_type = br_cfg->datapath_type; iface_hint->ofp_port = iface_pick_ofport(if_cfg); shash_add(&iface_hints, if_cfg->name, iface_hint); } } } ofproto_init(&iface_hints); // 创建ofproto shash_destroy_free_data(&iface_hints); initialized = true; }
ofproto/ofproto-dpif.c
const struct ofproto_class ofproto_dpif_class = { init, enumerate_types, enumerate_names, del, port_open_type, type_run, type_wait, alloc, construct, destruct, dealloc, run, ofproto_dpif_wait, NULL, /* get_memory_usage. */ type_get_memory_usage, flush, query_tables, set_tables_version, port_alloc, port_construct, port_destruct, port_dealloc, port_modified, port_reconfigured, port_query_by_name, port_add, port_del, port_set_config, port_get_stats, port_dump_start, port_dump_next, port_dump_done, port_poll, port_poll_wait, port_is_lacp_current, port_get_lacp_stats, NULL, /* rule_choose_table */ rule_alloc, rule_construct, rule_insert, NULL, /* rule_delete */ rule_destruct, rule_dealloc, rule_get_stats, rule_execute, set_frag_handling, packet_out, nxt_resume, set_netflow, get_netflow_ids, set_sflow, set_ipfix, get_ipfix_stats, set_cfm, cfm_status_changed, get_cfm_status, set_lldp, get_lldp_status, set_aa, aa_mapping_set, aa_mapping_unset, aa_vlan_get_queued, aa_vlan_get_queue_size, set_bfd, bfd_status_changed, get_bfd_status, set_stp, get_stp_status, set_stp_port, get_stp_port_status, get_stp_port_stats, set_rstp, get_rstp_status, set_rstp_port, get_rstp_port_status, set_queues, bundle_set, bundle_remove, mirror_set__, mirror_get_stats__, set_flood_vlans, is_mirror_output_bundle, forward_bpdu_changed, set_mac_table_config, set_mcast_snooping, set_mcast_snooping_port, NULL, /* meter_get_features */ NULL, /* meter_set */ NULL, /* meter_get */ NULL, /* meter_del */ group_alloc, /* group_alloc */ group_construct, /* group_construct */ group_destruct, /* group_destruct */ group_dealloc, /* group_dealloc */ NULL, /* group_modify */ group_get_stats, /* group_get_stats */ get_datapath_version, /* get_datapath_version */ ct_flush, /* ct_flush */ };
void ofproto_init(const struct shash *iface_hints) { struct shash_node *node; size_t i; ofproto_class_register(&ofproto_dpif_class); // 注册实现--ofproto-dpif /* Make a local copy, since we don't own 'iface_hints' elements. */ SHASH_FOR_EACH(node, iface_hints) { const struct iface_hint *orig_hint = node->data; struct iface_hint *new_hint = xmalloc(sizeof *new_hint); const char *br_type = ofproto_normalize_type(orig_hint->br_type); new_hint->br_name = xstrdup(orig_hint->br_name); new_hint->br_type = xstrdup(br_type); new_hint->ofp_port = orig_hint->ofp_port; shash_add(&init_ofp_ports, node->name, new_hint); } for (i = 0; i < n_ofproto_classes; i++) { ofproto_classes[i]->init(&init_ofp_ports); // 调用注册的实现的init方法 } ofproto_unixctl_init(); }
int ofproto_class_register(const struct ofproto_class *new_class) { size_t i; for (i = 0; i < n_ofproto_classes; i++) { if (ofproto_classes[i] == new_class) { return EEXIST; } } if (n_ofproto_classes >= allocated_ofproto_classes) { ofproto_classes = x2nrealloc(ofproto_classes, &allocated_ofproto_classes, sizeof *ofproto_classes); // ofproto_classes数组空间扩展×2 } ofproto_classes[n_ofproto_classes++] = new_class; return 0; }
static void init(const struct shash *iface_hints) { struct shash_node *node; /* Make a local copy, since we don't own 'iface_hints' elements. */ SHASH_FOR_EACH(node, iface_hints) { const struct iface_hint *orig_hint = node->data; struct iface_hint *new_hint = xmalloc(sizeof *new_hint); new_hint->br_name = xstrdup(orig_hint->br_name); new_hint->br_type = xstrdup(orig_hint->br_type); new_hint->ofp_port = orig_hint->ofp_port; shash_add(&init_ofp_ports, node->name, new_hint); } ofproto_unixctl_init(); udpif_init(); }
2 创建ofproto
static void bridge_reconfigure(const struct ovsrec_open_vswitch *ovs_cfg) { struct sockaddr_in *managers; struct bridge *br, *next; int sflow_bridge_number; size_t n_managers; COVERAGE_INC(bridge_reconfigure); ofproto_set_flow_limit(smap_get_int(&ovs_cfg->other_config, "flow-limit", OFPROTO_FLOW_LIMIT_DEFAULT)); ofproto_set_max_idle(smap_get_int(&ovs_cfg->other_config, "max-idle", OFPROTO_MAX_IDLE_DEFAULT)); ofproto_set_cpu_mask(smap_get(&ovs_cfg->other_config, "pmd-cpu-mask")); ofproto_set_threads( smap_get_int(&ovs_cfg->other_config, "n-handler-threads", 0), smap_get_int(&ovs_cfg->other_config, "n-revalidator-threads", 0)); /* Destroy "struct bridge"s, "struct port"s, and "struct iface"s according * to 'ovs_cfg', with only very minimal configuration otherwise. * * This is mostly an update to bridge data structures. Nothing is pushed * down to ofproto or lower layers. */ add_del_bridges(ovs_cfg); HMAP_FOR_EACH (br, node, &all_bridges) { bridge_collect_wanted_ports(br, &br->wanted_ports); bridge_del_ports(br, &br->wanted_ports); } /* Start pushing configuration changes down to the ofproto layer: * * - Delete ofprotos that are no longer configured. * * - Delete ports that are no longer configured. * * - Reconfigure existing ports to their desired configurations, or * delete them if not possible. * * We have to do all the deletions before we can do any additions, because * the ports to be added might require resources that will be freed up by * deletions (they might especially overlap in name). */ bridge_delete_ofprotos(); HMAP_FOR_EACH (br, node, &all_bridges) { if (br->ofproto) { bridge_delete_or_reconfigure_ports(br); } } /* Finish pushing configuration changes to the ofproto layer: * * - Create ofprotos that are missing. * * - Add ports that are missing. */ HMAP_FOR_EACH_SAFE (br, next, node, &all_bridges) { if (!br->ofproto) { int error; error = ofproto_create(br->name, br->type, &br->ofproto); // 创建ofproto if (error) { VLOG_ERR("failed to create bridge %s: %s", br->name, ovs_strerror(error)); shash_destroy(&br->wanted_ports); bridge_destroy(br, true); } else { /* Trigger storing datapath version. */ seq_change(connectivity_seq_get()); } } } HMAP_FOR_EACH (br, node, &all_bridges) { bridge_add_ports(br, &br->wanted_ports); shash_destroy(&br->wanted_ports); } reconfigure_system_stats(ovs_cfg); /* Complete the configuration. */ sflow_bridge_number = 0; collect_in_band_managers(ovs_cfg, &managers, &n_managers); HMAP_FOR_EACH (br, node, &all_bridges) { struct port *port; /* We need the datapath ID early to allow LACP ports to use it as the * default system ID. */ bridge_configure_datapath_id(br); HMAP_FOR_EACH (port, hmap_node, &br->ports) { struct iface *iface; port_configure(port); LIST_FOR_EACH (iface, port_elem, &port->ifaces) { iface_set_ofport(iface->cfg, iface->ofp_port); /* Clear eventual previous errors */ ovsrec_interface_set_error(iface->cfg, NULL); iface_configure_cfm(iface); iface_configure_qos(iface, port->cfg->qos); iface_set_mac(br, port, iface); ofproto_port_set_bfd(br->ofproto, iface->ofp_port, &iface->cfg->bfd); ofproto_port_set_lldp(br->ofproto, iface->ofp_port, &iface->cfg->lldp); ofproto_port_set_config(br->ofproto, iface->ofp_port, &iface->cfg->other_config); } } bridge_configure_mirrors(br); bridge_configure_forward_bpdu(br); bridge_configure_mac_table(br); bridge_configure_mcast_snooping(br); bridge_configure_remotes(br, managers, n_managers); bridge_configure_netflow(br); bridge_configure_sflow(br, &sflow_bridge_number); bridge_configure_ipfix(br); bridge_configure_spanning_tree(br); bridge_configure_tables(br); bridge_configure_dp_desc(br); bridge_configure_aa(br); } free(managers); /* The ofproto-dpif provider does some final reconfiguration in its * ->type_run() function. We have to call it before notifying the database * client that reconfiguration is complete, otherwise there is a very * narrow race window in which e.g. ofproto/trace will not recognize the * new configuration (sometimes this causes unit test failures). */ bridge_run__(); }
int ofproto_create(const char *datapath_name, const char *datapath_type, struct ofproto **ofprotop) OVS_EXCLUDED(ofproto_mutex) { const struct ofproto_class *class; struct ofproto *ofproto; int error; int i; *ofprotop = NULL; datapath_type = ofproto_normalize_type(datapath_type); class = ofproto_class_find__(datapath_type); // 查找ofproto的实现类,初始化部分设置了ofproto-dpif if (!class) { VLOG_WARN("could not create datapath %s of unknown type %s", datapath_name, datapath_type); return EAFNOSUPPORT; } ofproto = class->alloc(); if (!ofproto) { VLOG_ERR("failed to allocate datapath %s of type %s", datapath_name, datapath_type); return ENOMEM; } /* Initialize. */ ovs_mutex_lock(&ofproto_mutex); memset(ofproto, 0, sizeof *ofproto); ofproto->ofproto_class = class; ofproto->name = xstrdup(datapath_name); ofproto->type = xstrdup(datapath_type); hmap_insert(&all_ofprotos, &ofproto->hmap_node, hash_string(ofproto->name, 0)); ofproto->datapath_id = 0; ofproto->forward_bpdu = false; ofproto->fallback_dpid = pick_fallback_dpid(); ofproto->mfr_desc = NULL; ofproto->hw_desc = NULL; ofproto->sw_desc = NULL; ofproto->serial_desc = NULL; ofproto->dp_desc = NULL; ofproto->frag_handling = OFPUTIL_FRAG_NORMAL; hmap_init(&ofproto->ports); hmap_init(&ofproto->ofport_usage); shash_init(&ofproto->port_by_name); simap_init(&ofproto->ofp_requests); ofproto->max_ports = ofp_to_u16(OFPP_MAX); ofproto->eviction_group_timer = LLONG_MIN; ofproto->tables = NULL; ofproto->n_tables = 0; ofproto->tables_version = OVS_VERSION_MIN; hindex_init(&ofproto->cookies); hmap_init(&ofproto->learned_cookies); ovs_list_init(&ofproto->expirable); ofproto->connmgr = connmgr_create(ofproto, datapath_name, datapath_name); guarded_list_init(&ofproto->rule_executes); ofproto->min_mtu = INT_MAX; cmap_init(&ofproto->groups); ovs_mutex_unlock(&ofproto_mutex); ofproto->ogf.types = 0xf; ofproto->ogf.capabilities = OFPGFC_CHAINING | OFPGFC_SELECT_LIVENESS | OFPGFC_SELECT_WEIGHT; for (i = 0; i < 4; i++) { ofproto->ogf.max_groups[i] = OFPG_MAX; ofproto->ogf.ofpacts[i] = (UINT64_C(1) << N_OFPACTS) - 1; } tun_metadata_init(); error = ofproto->ofproto_class->construct(ofproto); if (error) { VLOG_ERR("failed to open datapath %s: %s", datapath_name, ovs_strerror(error)); ovs_mutex_lock(&ofproto_mutex); connmgr_destroy(ofproto->connmgr); ofproto->connmgr = NULL; ovs_mutex_unlock(&ofproto_mutex); ofproto_destroy__(ofproto); return error; } /* Check that hidden tables, if any, are at the end. */ ovs_assert(ofproto->n_tables); for (i = 0; i + 1 < ofproto->n_tables; i++) { enum oftable_flags flags = ofproto->tables[i].flags; enum oftable_flags next_flags = ofproto->tables[i + 1].flags; ovs_assert(!(flags & OFTABLE_HIDDEN) || next_flags & OFTABLE_HIDDEN); } ofproto->datapath_id = pick_datapath_id(ofproto); init_ports(ofproto); /* Initialize meters table. */ if (ofproto->ofproto_class->meter_get_features) { ofproto->ofproto_class->meter_get_features(ofproto, &ofproto->meter_features); } else { memset(&ofproto->meter_features, 0, sizeof ofproto->meter_features); } ofproto->meters = xzalloc((ofproto->meter_features.max_meters + 1) * sizeof(struct meter *)); /* Set the initial tables version. */ ofproto_bump_tables_version(ofproto); *ofprotop = ofproto; return 0; }
static const struct ofproto_class * ofproto_class_find__(const char *type) { size_t i; for (i = 0; i < n_ofproto_classes; i++) { const struct ofproto_class *class = ofproto_classes[i]; struct sset types; bool found; sset_init(&types); class->enumerate_types(&types); found = sset_contains(&types, type); sset_destroy(&types); if (found) { return class; } } VLOG_WARN("unknown datapath type %s", type); return NULL; }
alloc
static struct ofproto * alloc(void) { struct ofproto_dpif *ofproto = xzalloc(sizeof *ofproto); return &ofproto->up; }
construct
static int construct(struct ofproto *ofproto_) { struct ofproto_dpif *ofproto = ofproto_dpif_cast(ofproto_); struct shash_node *node, *next; int error; /* Tunnel module can get used right after the udpif threads are running. */ ofproto_tunnel_init(); error = open_dpif_backer(ofproto->up.type, &ofproto->backer); // 调用udpif_set_threads,开启ofproto的upcall线程,接收内核态数据
/*
open_dpif_backer-->dpif_create_and_open:ovs-system-->dpif_create-->do_open-->dp_initialize
注册dpif_netlink_class-->dpif_netlink_class.open-->dpif_netlink_open-->dpif_netlink_dp_transact-->nl_transact
*/ if (error) { return error; } uuid_generate(&ofproto->uuid); atomic_init(&ofproto->tables_version, OVS_VERSION_MIN); ofproto->netflow = NULL; ofproto->sflow = NULL; ofproto->ipfix = NULL; ofproto->stp = NULL; ofproto->rstp = NULL; ofproto->dump_seq = 0; hmap_init(&ofproto->bundles); ofproto->ml = mac_learning_create(MAC_ENTRY_DEFAULT_IDLE_TIME); // 创建mac学习表mac_learning结构 ofproto->ms = NULL; ofproto->mbridge = mbridge_create(); // mbridge是个啥??? ofproto->has_bonded_bundles = false; ofproto->lacp_enabled = false; ovs_mutex_init_adaptive(&ofproto->stats_mutex); guarded_list_init(&ofproto->ams); sset_init(&ofproto->ports); sset_init(&ofproto->ghost_ports); sset_init(&ofproto->port_poll_set); ofproto->port_poll_errno = 0; ofproto->change_seq = 0; ofproto->ams_seq = seq_create(); ofproto->ams_seqno = seq_read(ofproto->ams_seq); SHASH_FOR_EACH_SAFE (node, next, &init_ofp_ports) { struct iface_hint *iface_hint = node->data; if (!strcmp(iface_hint->br_name, ofproto->up.name)) { /* Check if the datapath already has this port. */ if (dpif_port_exists(ofproto->backer->dpif, node->name)) { sset_add(&ofproto->ports, node->name); } free(iface_hint->br_name); free(iface_hint->br_type); free(iface_hint); shash_delete(&init_ofp_ports, node); } } hmap_insert(&all_ofproto_dpifs, &ofproto->all_ofproto_dpifs_node, hash_string(ofproto->up.name, 0)); memset(&ofproto->stats, 0, sizeof ofproto->stats); ofproto_init_tables(ofproto_, N_TABLES); error = add_internal_flows(ofproto); // 添加初始流表 ofproto->up.tables[TBL_INTERNAL].flags = OFTABLE_HIDDEN | OFTABLE_READONLY; return error; }
初始流表
static int add_internal_flows(struct ofproto_dpif *ofproto) { struct ofpact_controller *controller; uint64_t ofpacts_stub[128 / 8]; struct ofpbuf ofpacts; struct rule *unused_rulep OVS_UNUSED; struct match match; int error; int id; ofpbuf_use_stack(&ofpacts, ofpacts_stub, sizeof ofpacts_stub); id = 1; controller = ofpact_put_CONTROLLER(&ofpacts); controller->max_len = UINT16_MAX; controller->controller_id = 0; controller->reason = OFPR_IMPLICIT_MISS; ofpact_finish_CONTROLLER(&ofpacts, &controller); error = add_internal_miss_flow(ofproto, id++, &ofpacts, &ofproto->miss_rule); if (error) { return error; } ofpbuf_clear(&ofpacts); error = add_internal_miss_flow(ofproto, id++, &ofpacts, &ofproto->no_packet_in_rule); if (error) { return error; } error = add_internal_miss_flow(ofproto, id++, &ofpacts, &ofproto->drop_frags_rule); if (error) { return error; } /* Drop any run away non-recirc rule lookups. Recirc_id has to be * zero when reaching this rule. * * (priority=2), recirc_id=0, actions=drop */ ofpbuf_clear(&ofpacts); match_init_catchall(&match); match_set_recirc_id(&match, 0); error = ofproto_dpif_add_internal_flow(ofproto, &match, 2, 0, &ofpacts, &unused_rulep); return error; }
add_internal_miss_flow(ofproto, id++, &ofpacts, &ofproto->miss_rule)
经过ofproto_flow_mod-->handle_flow_mod__-->ofproto_flow_mod_init-->add_flow_init-->ofproto_rule_create-->rule_construct
创建如下流表:
cookie=0x0, duration=2265.499s, table=0, n_packets=0, n_bytes=0, idle_age=2265, priority=0 actions=NORMAL
init_ports
static int init_ports(struct ofproto *p) { struct ofproto_port_dump dump; struct ofproto_port ofproto_port; struct shash_node *node, *next; OFPROTO_PORT_FOR_EACH (&ofproto_port, &dump, p) { const char *name = ofproto_port.name; if (shash_find(&p->port_by_name, name)) { VLOG_WARN_RL(&rl, "%s: ignoring duplicate device %s in datapath", p->name, name); } else { struct ofputil_phy_port pp; struct netdev *netdev; /* Check if an OpenFlow port number had been requested. */ node = shash_find(&init_ofp_ports, name); if (node) { const struct iface_hint *iface_hint = node->data; simap_put(&p->ofp_requests, name, ofp_to_u16(iface_hint->ofp_port)); } netdev = ofport_open(p, &ofproto_port, &pp); if (netdev) { ofport_install(p, netdev, &pp); if (ofp_to_u16(ofproto_port.ofp_port) < p->max_ports) { p->alloc_port_no = MAX(p->alloc_port_no, ofp_to_u16(ofproto_port.ofp_port)); } } } } SHASH_FOR_EACH_SAFE(node, next, &init_ofp_ports) { struct iface_hint *iface_hint = node->data; if (!strcmp(iface_hint->br_name, p->name)) { free(iface_hint->br_name); free(iface_hint->br_type); free(iface_hint); shash_delete(&init_ofp_ports, node); } } return 0; }
static struct netdev * ofport_open(struct ofproto *ofproto, struct ofproto_port *ofproto_port, struct ofputil_phy_port *pp) { enum netdev_flags flags; struct netdev *netdev; int error; error = netdev_open(ofproto_port->name, ofproto_port->type, &netdev); if (error) { VLOG_WARN_RL(&rl, "%s: ignoring port %s (%"PRIu16") because netdev %s " "cannot be opened (%s)", ofproto->name, ofproto_port->name, ofproto_port->ofp_port, ofproto_port->name, ovs_strerror(error)); return NULL; } if (ofproto_port->ofp_port == OFPP_NONE) { if (!strcmp(ofproto->name, ofproto_port->name)) { ofproto_port->ofp_port = OFPP_LOCAL; } else { ofproto_port->ofp_port = alloc_ofp_port(ofproto, ofproto_port->name); } } pp->port_no = ofproto_port->ofp_port; netdev_get_etheraddr(netdev, &pp->hw_addr); ovs_strlcpy(pp->name, ofproto_port->name, sizeof pp->name); netdev_get_flags(netdev, &flags); pp->config = flags & NETDEV_UP ? 0 : OFPUTIL_PC_PORT_DOWN; pp->state = netdev_get_carrier(netdev) ? 0 : OFPUTIL_PS_LINK_DOWN; netdev_get_features(netdev, &pp->curr, &pp->advertised, &pp->supported, &pp->peer); pp->curr_speed = netdev_features_to_bps(pp->curr, 0) / 1000; pp->max_speed = netdev_features_to_bps(pp->supported, 0) / 1000; return netdev; }
int netdev_open(const char *name, const char *type, struct netdev **netdevp) OVS_EXCLUDED(netdev_mutex) { struct netdev *netdev; int error; netdev_initialize(); ovs_mutex_lock(&netdev_mutex); netdev = shash_find_data(&netdev_shash, name); if (!netdev) { struct netdev_registered_class *rc; rc = netdev_lookup_class(type && type[0] ? type : "system"); if (rc && ovs_refcount_try_ref_rcu(&rc->refcnt)) { netdev = rc->class->alloc(); if (netdev) { memset(netdev, 0, sizeof *netdev); netdev->netdev_class = rc->class; netdev->name = xstrdup(name); netdev->change_seq = 1; netdev->reconfigure_seq = seq_create(); netdev->last_reconfigure_seq = seq_read(netdev->reconfigure_seq); netdev->node = shash_add(&netdev_shash, name, netdev); /* By default enable one tx and rx queue per netdev. */ netdev->n_txq = netdev->netdev_class->send ? 1 : 0; netdev->n_rxq = netdev->netdev_class->rxq_alloc ? 1 : 0; ovs_list_init(&netdev->saved_flags_list); error = rc->class->construct(netdev); if (!error) { netdev_change_seq_changed(netdev); } else { ovs_refcount_unref(&rc->refcnt); seq_destroy(netdev->reconfigure_seq); free(netdev->name); ovs_assert(ovs_list_is_empty(&netdev->saved_flags_list)); shash_delete(&netdev_shash, netdev->node); rc->class->dealloc(netdev); } } else { error = ENOMEM; } } else { VLOG_WARN("could not create netdev %s of unknown type %s", name, type); error = EAFNOSUPPORT; } } else { error = 0; } if (!error) { netdev->ref_cnt++; *netdevp = netdev; } else { *netdevp = NULL; } ovs_mutex_unlock(&netdev_mutex); return error; }
注册netdevice
static void netdev_initialize(void) OVS_EXCLUDED(netdev_mutex) { static struct ovsthread_once once = OVSTHREAD_ONCE_INITIALIZER; if (ovsthread_once_start(&once)) { fatal_signal_add_hook(restore_all_flags, NULL, NULL, true); netdev_vport_patch_register(); #ifdef __linux__ netdev_register_provider(&netdev_linux_class); netdev_register_provider(&netdev_internal_class); netdev_register_provider(&netdev_tap_class); netdev_vport_tunnel_register(); #endif #if defined(__FreeBSD__) || defined(__NetBSD__) netdev_register_provider(&netdev_tap_class); netdev_register_provider(&netdev_bsd_class); #endif #ifdef _WIN32 netdev_register_provider(&netdev_windows_class); netdev_register_provider(&netdev_internal_class); netdev_vport_tunnel_register(); #endif ovsthread_once_done(&once); } }
以system为例
const struct netdev_class netdev_linux_class = NETDEV_LINUX_CLASS( "system", netdev_linux_construct, netdev_linux_get_stats, netdev_linux_get_features, netdev_linux_get_status);
construct
static int netdev_linux_construct(struct netdev *netdev_) { struct netdev_linux *netdev = netdev_linux_cast(netdev_); int error; netdev_linux_common_construct(netdev); error = get_flags(&netdev->up, &netdev->ifi_flags); if (error == ENODEV) { if (netdev->up.netdev_class != &netdev_internal_class) { /* The device does not exist, so don't allow it to be opened. */ return ENODEV; } else { /* "Internal" netdevs have to be created as netdev objects before * they exist in the kernel, because creating them in the kernel * happens by passing a netdev object to dpif_port_add(). * Therefore, ignore the error. */ } } return 0; }
ofport_install
static int ofport_install(struct ofproto *p, struct netdev *netdev, const struct ofputil_phy_port *pp) { const char *netdev_name = netdev_get_name(netdev); struct ofport *ofport; int error; /* Create ofport. */ ofport = p->ofproto_class->port_alloc(); if (!ofport) { error = ENOMEM; goto error; } ofport->ofproto = p; ofport->netdev = netdev; ofport->change_seq = netdev_get_change_seq(netdev); ofport->pp = *pp; ofport->ofp_port = pp->port_no; ofport->created = time_msec(); /* Add port to 'p'. */ hmap_insert(&p->ports, &ofport->hmap_node, hash_ofp_port(ofport->ofp_port)); shash_add(&p->port_by_name, netdev_name, ofport); update_mtu(p, ofport); /* Let the ofproto_class initialize its private data. */ error = p->ofproto_class->port_construct(ofport); if (error) { goto error; } connmgr_send_port_status(p->connmgr, NULL, pp, OFPPR_ADD); return 0; error: VLOG_WARN_RL(&rl, "%s: could not add port %s (%s)", p->name, netdev_name, ovs_strerror(error)); if (ofport) { ofport_destroy__(ofport); } else { netdev_close(netdev); } return error; }
port_alloc
static struct ofport * port_alloc(void) { struct ofport_dpif *port = xzalloc(sizeof *port); return &port->up; }
port_construct
static int port_construct(struct ofport *port_) { struct ofport_dpif *port = ofport_dpif_cast(port_); struct ofproto_dpif *ofproto = ofproto_dpif_cast(port->up.ofproto); const struct netdev *netdev = port->up.netdev; char namebuf[NETDEV_VPORT_NAME_BUFSIZE]; const char *dp_port_name; struct dpif_port dpif_port; int error; ofproto->backer->need_revalidate = REV_RECONFIGURE; port->bundle = NULL; port->cfm = NULL; port->bfd = NULL; port->lldp = NULL; port->may_enable = false; port->stp_port = NULL; port->stp_state = STP_DISABLED; port->rstp_port = NULL; port->rstp_state = RSTP_DISABLED; port->is_tunnel = false; port->peer = NULL; port->qdscp = NULL; port->n_qdscp = 0; port->carrier_seq = netdev_get_carrier_resets(netdev); port->is_layer3 = netdev_vport_is_layer3(netdev); if (netdev_vport_is_patch(netdev)) { /* By bailing out here, we don't submit the port to the sFlow module * to be considered for counter polling export. This is correct * because the patch port represents an interface that sFlow considers * to be "internal" to the switch as a whole, and therefore not a * candidate for counter polling. */ port->odp_port = ODPP_NONE; ofport_update_peer(port); return 0; } dp_port_name = netdev_vport_get_dpif_port(netdev, namebuf, sizeof namebuf); error = dpif_port_query_by_name(ofproto->backer->dpif, dp_port_name, &dpif_port); if (error) { return error; } port->odp_port = dpif_port.port_no; if (netdev_get_tunnel_config(netdev)) { atomic_count_inc(&ofproto->backer->tnl_count); error = tnl_port_add(port, port->up.netdev, port->odp_port, ovs_native_tunneling_is_on(ofproto), dp_port_name); if (error) { atomic_count_dec(&ofproto->backer->tnl_count); dpif_port_destroy(&dpif_port); return error; } port->is_tunnel = true; if (ofproto->ipfix) { dpif_ipfix_add_tunnel_port(ofproto->ipfix, port_, port->odp_port); } } else { /* Sanity-check that a mapping doesn't already exist. This * shouldn't happen for non-tunnel ports. */ if (odp_port_to_ofp_port(ofproto, port->odp_port) != OFPP_NONE) { VLOG_ERR("port %s already has an OpenFlow port number", dpif_port.name); dpif_port_destroy(&dpif_port); return EBUSY; } ovs_rwlock_wrlock(&ofproto->backer->odp_to_ofport_lock); hmap_insert(&ofproto->backer->odp_to_ofport_map, &port->odp_port_node, hash_odp_port(port->odp_port)); ovs_rwlock_unlock(&ofproto->backer->odp_to_ofport_lock); } dpif_port_destroy(&dpif_port); if (ofproto->sflow) { dpif_sflow_add_port(ofproto->sflow, port_, port->odp_port); } return 0; }
二 udpif接口层采用多个线程处理内核发往用户层的upcall请求,入口函数为udpif_set_threads(),主要处理流程如下:

struct udpif { struct ovs_list list_node; /* In all_udpifs list. */ struct dpif *dpif; /* Datapath handle. */ struct dpif_backer *backer; /* Opaque dpif_backer pointer. */ struct handler *handlers; /* Upcall handlers. */ size_t n_handlers; struct revalidator *revalidators; /* Flow revalidators. */ size_t n_revalidators; struct latch exit_latch; /* Tells child threads to exit. */ /* Revalidation. */ struct seq *reval_seq; /* Incremented to force revalidation. */ bool reval_exit; /* Set by leader on 'exit_latch. */ struct ovs_barrier reval_barrier; /* Barrier used by revalidators. */ struct dpif_flow_dump *dump; /* DPIF flow dump state. */ long long int dump_duration; /* Duration of the last flow dump. */ struct seq *dump_seq; /* Increments each dump iteration. */ atomic_bool enable_ufid; /* If true, skip dumping flow attrs. */ /* These variables provide a mechanism for the main thread to pause * all revalidation without having to completely shut the threads down. * 'pause_latch' is shared between the main thread and the lead * revalidator thread, so when it is desirable to halt revalidation, the * main thread will set the latch. 'pause' and 'pause_barrier' are shared * by revalidator threads. The lead revalidator will set 'pause' when it * observes the latch has been set, and this will cause all revalidator * threads to wait on 'pause_barrier' at the beginning of the next * revalidation round. */ bool pause; /* Set by leader on 'pause_latch. */ struct latch pause_latch; /* Set to force revalidators pause. */
struct ovsthread_aux { void *(*start)(void *); void *arg; char name[16]; };
void udpif_set_threads(struct udpif *udpif, size_t n_handlers, size_t n_revalidators) { ovs_assert(udpif); ovs_assert(n_handlers && n_revalidators); ovsrcu_quiesce_start(); if (udpif->n_handlers != n_handlers || udpif->n_revalidators != n_revalidators) { udpif_stop_threads(udpif); } if (!udpif->handlers && !udpif->revalidators) { int error; error = dpif_handlers_set(udpif->dpif, n_handlers); if (error) { VLOG_ERR("failed to configure handlers in dpif %s: %s", dpif_name(udpif->dpif), ovs_strerror(error)); return; } udpif_start_threads(udpif, n_handlers, n_revalidators); // 启动线程 } ovsrcu_quiesce_end(); }
static void udpif_start_threads(struct udpif *udpif, size_t n_handlers, size_t n_revalidators) { if (udpif && n_handlers && n_revalidators) { size_t i; bool enable_ufid; udpif->n_handlers = n_handlers; udpif->n_revalidators = n_revalidators; udpif->handlers = xzalloc(udpif->n_handlers * sizeof *udpif->handlers); for (i = 0; i < udpif->n_handlers; i++) { struct handler *handler = &udpif->handlers[i]; handler->udpif = udpif; handler->handler_id = i; handler->thread = ovs_thread_create( "handler", udpif_upcall_handler, handler); // 创建handler线程,线程方法udpif_upcall_handler } enable_ufid = ofproto_dpif_get_enable_ufid(udpif->backer); atomic_init(&udpif->enable_ufid, enable_ufid); dpif_enable_upcall(udpif->dpif); ovs_barrier_init(&udpif->reval_barrier, udpif->n_revalidators); ovs_barrier_init(&udpif->pause_barrier, udpif->n_revalidators + 1); udpif->reval_exit = false; udpif->pause = false; udpif->revalidators = xzalloc(udpif->n_revalidators * sizeof *udpif->revalidators); for (i = 0; i < udpif->n_revalidators; i++) { struct revalidator *revalidator = &udpif->revalidators[i]; revalidator->udpif = udpif; revalidator->thread = ovs_thread_create( "revalidator", udpif_revalidator, revalidator); // 创建revalidator线程,线程方法udpif_revalidator,有什么用? } } }
struct handler { struct udpif *udpif; /* Parent udpif. */ pthread_t thread; /* Thread ID. */ uint32_t handler_id; /* Handler id. */ };
static void * udpif_upcall_handler(void *arg) { struct handler *handler = arg; struct udpif *udpif = handler->udpif; while (!latch_is_set(&handler->udpif->exit_latch)) { if (recv_upcalls(handler)) { poll_immediate_wake(); } else { dpif_recv_wait(udpif->dpif, handler->handler_id); latch_wait(&udpif->exit_latch); } poll_block(); } return NULL; }
bool latch_is_set(const struct latch *latch) { struct pollfd pfd; int retval; pfd.fd = latch->fds[0]; pfd.events = POLLIN; do { retval = poll(&pfd, 1, 0); } while (retval < 0 && errno == EINTR); return pfd.revents & POLLIN; }
revalidator
static void * udpif_revalidator(void *arg) { /* Used by all revalidators. */ struct revalidator *revalidator = arg; struct udpif *udpif = revalidator->udpif; bool leader = revalidator == &udpif->revalidators[0]; /* Used only by the leader. */ long long int start_time = 0; uint64_t last_reval_seq = 0; size_t n_flows = 0; revalidator->id = ovsthread_id_self(); for (;;) { if (leader) { uint64_t reval_seq; recirc_run(); /* Recirculation cleanup. */ reval_seq = seq_read(udpif->reval_seq); last_reval_seq = reval_seq; n_flows = udpif_get_n_flows(udpif); udpif->max_n_flows = MAX(n_flows, udpif->max_n_flows); udpif->avg_n_flows = (udpif->avg_n_flows + n_flows) / 2; /* Only the leader checks the pause latch to prevent a race where * some threads think it's false and proceed to block on * reval_barrier and others think it's true and block indefinitely * on the pause_barrier */ udpif->pause = latch_is_set(&udpif->pause_latch); /* Only the leader checks the exit latch to prevent a race where * some threads think it's true and exit and others think it's * false and block indefinitely on the reval_barrier */ udpif->reval_exit = latch_is_set(&udpif->exit_latch); start_time = time_msec(); if (!udpif->reval_exit) { bool terse_dump; terse_dump = udpif_use_ufid(udpif); udpif->dump = dpif_flow_dump_create(udpif->dpif, terse_dump); } } /* Wait for the leader to start the flow dump. */ ovs_barrier_block(&udpif->reval_barrier); if (udpif->pause) { revalidator_pause(revalidator); } if (udpif->reval_exit) { break; } revalidate(revalidator); /* Wait for all flows to have been dumped before we garbage collect. */ ovs_barrier_block(&udpif->reval_barrier); revalidator_sweep(revalidator); /* Wait for all revalidators to finish garbage collection. */ ovs_barrier_block(&udpif->reval_barrier); if (leader) { unsigned int flow_limit; long long int duration; atomic_read_relaxed(&udpif->flow_limit, &flow_limit); dpif_flow_dump_destroy(udpif->dump); seq_change(udpif->dump_seq); duration = MAX(time_msec() - start_time, 1); udpif->dump_duration = duration; if (duration > 2000) { flow_limit /= duration / 1000; } else if (duration > 1300) { flow_limit = flow_limit * 3 / 4; } else if (duration < 1000 && n_flows > 2000 && flow_limit < n_flows * 1000 / duration) { flow_limit += 1000; } flow_limit = MIN(ofproto_flow_limit, MAX(flow_limit, 1000)); atomic_store_relaxed(&udpif->flow_limit, flow_limit); if (duration > 2000) { VLOG_INFO("Spent an unreasonably long %lldms dumping flows", duration); } poll_timer_wait_until(start_time + MIN(ofproto_max_idle, 500)); seq_wait(udpif->reval_seq, last_reval_seq); latch_wait(&udpif->exit_latch); latch_wait(&udpif->pause_latch); poll_block(); if (!latch_is_set(&udpif->pause_latch) && !latch_is_set(&udpif->exit_latch)) { long long int now = time_msec(); /* Block again if we are woken up within 5ms of the last start * time. */ start_time += 5; if (now < start_time) { poll_timer_wait_until(start_time); latch_wait(&udpif->exit_latch); latch_wait(&udpif->pause_latch); poll_block(); } } } } return NULL; }
浙公网安备 33010602011771号