rps ipi

Linux SMP 多核调用示例

在多core系统中,系统启动后内核将会在一个core上执行,且会在不同core上进行调度。即内核模块(驱动)加载只会在一个core中执行一次初始化。那么需要在所有core,或某个特殊的core执行那么我们需要smp提供的多核接口。

smp相关api

linux/smp.h中定义了多core调用的函数以及相关的数据结构。

在所有的core上执行函数func,info是传递给func的参数。

void on_each_cpu(smp_call_func_t func, void *info, int wait);

在给定cpumask中所有的core上执行函数func,info是传递给func的参数。

void on_each_cpu_mask(const struct cpumask *mask, smp_call_func_t func, void *info, bool wait);

除了调用core外所有的core都执行

void smp_call_function(smp_call_func_t func, void *info, int wait);

在指定的cpumask所对应的core上执行,但是需要除去当前调用的core。

void smp_call_function_many(const struct cpumask *mask, smp_call_func_t func, void *info, bool wait);

在指定的cpumask所对应的core中的其中一个core上执行一次

int smp_call_function_any(const struct cpumask *mask, smp_call_func_t func, void *info, int wait);

在指定的cpuid上执行一次

int smp_call_function_single(int cpuid, smp_call_func_t func, void *info, int wait);

在内核空间中,定义了几个全局cpumask变量。

  • cpu_possible_mask- has bit ‘cpu’ set iff cpu is populatable
  • cpu_present_mask - has bit ‘cpu’ set iff cpu is populated
  • cpu_online_mask - has bit ‘cpu’ set iff cpu available to scheduler
  • cpu_active_mask - has bit ‘cpu’ set iff cpu available to migration

 

static int rps_ipi_queued(struct softnet_data *sd)
{
#ifdef CONFIG_RPS
    struct softnet_data *mysd = &__get_cpu_var(softnet_data);

    if (sd != mysd) {
        sd->rps_ipi_next = mysd->rps_ipi_list;
        mysd->rps_ipi_list = sd;

        __raise_softirq_irqoff(NET_RX_SOFTIRQ);
        return 1;
    }
#endif /* CONFIG_RPS */
    return 0;
}

 

// rps决定的cpu input_pkt_queue,收到第一个包,需要调度对方cpu的napi执行,通过ipi的方式
        /* Schedule NAPI for backlog device
         * We can use non atomic operation since we own the queue lock
         */
        if (!__test_and_set_bit(NAPI_STATE_SCHED, &sd->backlog.state)) {// backlog还没调度
            if (!rps_ipi_queued(sd))    //rps决定的cpu不是本地cpu, 把sd添加到本地sd的rps_ipi_list链表中, 并设置本地软中断
                ____napi_schedule(sd, &sd->backlog);    //sd是本地cpu上的,直接____napi_schedule调度napi
        }

 

/*
 * Incoming packets are placed on per-cpu queues
 */
struct softnet_data {
    struct Qdisc        *output_queue;
    struct Qdisc        **output_queue_tailp;
    struct list_head    poll_list;
    struct sk_buff        *completion_queue;
    struct sk_buff_head    process_queue;

    /* stats */
    unsigned int        processed;
    unsigned int        time_squeeze;
    unsigned int        cpu_collision;
    unsigned int        received_rps;

#ifdef CONFIG_RPS
    struct softnet_data    *rps_ipi_list;

    /* Elements below can be accessed between CPUs for RPS */
    struct call_single_data    csd ____cacheline_aligned_in_smp;
    struct softnet_data    *rps_ipi_next;
    unsigned int        cpu;
    unsigned int        input_queue_head;
    unsigned int        input_queue_tail;
#endif
    unsigned        dropped;
    struct sk_buff_head    input_pkt_queue;
    struct napi_struct    backlog;
};
/*
 * Incoming packets are placed on per-cpu queues
 */
struct softnet_data {
    struct Qdisc        *output_queue;
    struct Qdisc        **output_queue_tailp;
    struct list_head    poll_list;
    struct sk_buff        *completion_queue;
    struct sk_buff_head    process_queue;

    /* stats */
    unsigned int        processed;
    unsigned int        time_squeeze;
    unsigned int        cpu_collision;
    unsigned int        received_rps;

#ifdef CONFIG_RPS
    struct softnet_data    *rps_ipi_list;

    /* Elements below can be accessed between CPUs for RPS */
    struct call_single_data    csd ____cacheline_aligned_in_smp;
    struct softnet_data    *rps_ipi_next;
    unsigned int        cpu;
    unsigned int        input_queue_head;
    unsigned int        input_queue_tail;
#endif
    unsigned        dropped;
    struct sk_buff_head    input_pkt_queue;
    struct napi_struct    backlog;
};

 

/* Called from hardirq (IPI) context */
static void rps_trigger_softirq(void *data)
{
        struct softnet_data *sd = data;

        ____napi_schedule(sd, &sd->backlog);
        sd->received_rps++;
}

 

 

 

/*
 *       This is called single threaded during boot, so no need
 *       to take the rtnl semaphore.
 */
static int __init net_dev_init(void)
{
        int i, rc = -ENOMEM;

        BUG_ON(!dev_boot_phase);

        if (dev_proc_init())
                goto out;

        if (netdev_kobject_init())
                goto out;
 *       This is called single threaded during boot, so no need
 *       to take the rtnl semaphore.
 */
static int __init net_dev_init(void)
{
        int i, rc = -ENOMEM;

        BUG_ON(!dev_boot_phase);

        if (dev_proc_init())
                goto out;

        if (netdev_kobject_init())
                goto out;

        INIT_LIST_HEAD(&ptype_all);
        for (i = 0; i < PTYPE_HASH_SIZE; i++)
                INIT_LIST_HEAD(&ptype_base[i]);

        INIT_LIST_HEAD(&offload_base);

        if (register_pernet_subsys(&netdev_net_ops))
                goto out;

        /*
         *      Initialise the packet receive queues.
         */

        for_each_possible_cpu(i) {
                struct work_struct *flush = per_cpu_ptr(&flush_works, i);
                struct softnet_data *sd = &per_cpu(softnet_data, i);

                INIT_WORK(flush, flush_backlog);

                skb_queue_head_init(&sd->input_pkt_queue);
                skb_queue_head_init(&sd->process_queue);
#ifdef CONFIG_XFRM_OFFLOAD
                skb_queue_head_init(&sd->xfrm_backlog);
#endif
                INIT_LIST_HEAD(&sd->poll_list);
                sd->output_queue_tailp = &sd->output_queue;
#ifdef CONFIG_RPS
                sd->csd.func = rps_trigger_softirq;
                sd->csd.info = sd;
                sd->cpu = i;
#endif

                init_gro_hash(&sd->backlog);
                sd->backlog.poll = process_backlog;
                sd->backlog.weight = weight_p;
        }

        dev_boot_phase = 0;

        /* The loopback device is special if any other network devices
         * is present in a network namespace the loopback device must
         * be present. Since we now dynamically allocate and free the
         * loopback device ensure this invariant is maintained by
         * keeping the loopback device as the first device on the
         * list of network devices.  Ensuring the loopback devices
         * is the first device that appears and the last network device
         * that disappears.
         */
           */
        if (register_pernet_device(&loopback_net_ops))
                goto out;

        if (register_pernet_device(&default_device_ops))
                goto out;

        open_softirq(NET_TX_SOFTIRQ, net_tx_action);
        open_softirq(NET_RX_SOFTIRQ, net_rx_action);

        rc = cpuhp_setup_state_nocalls(CPUHP_NET_DEV_DEAD, "net/dev:dead",
                                       NULL, dev_cpu_dead);
        WARN_ON(rc < 0);
        rc = 0;
out:
        return rc;
}

 

 

 

 

void __smp_call_function_single(int cpu, struct call_single_data *data,
                int wait)
{
    unsigned int this_cpu;
    unsigned long flags;

    this_cpu = get_cpu();
    /*
     * Can deadlock when called with interrupts disabled.
     * We allow cpu's that are not yet online though, as no one else can
     * send smp call function interrupt to this cpu and as such deadlocks
     * can't happen.
     */
    WARN_ON_ONCE(cpu_online(smp_processor_id()) && wait && irqs_disabled()
             && !oops_in_progress);

    if (cpu == this_cpu) {
        local_irq_save(flags);
        data->func(data->info);
        local_irq_restore(flags);
    } else {
        csd_lock(data);
        generic_exec_single(cpu, data, wait);
    }
    put_cpu();
}

 

 

/*
 * net_rps_action sends any pending IPI's for rps.
 * Note: called with local irq disabled, but exits with local irq enabled.
 */
static void net_rps_action_and_irq_enable(struct softnet_data *sd)
{
#ifdef CONFIG_RPS
    struct softnet_data *remsd = sd->rps_ipi_list;

    if (remsd) {
        sd->rps_ipi_list = NULL;

        local_irq_enable();

        /* Send pending IPI's to kick RPS processing on remote cpus. */
        while (remsd) {
            struct softnet_data *next = remsd->rps_ipi_next;

            if (cpu_online(remsd->cpu))
                __smp_call_function_single(remsd->cpu,
                               &remsd->csd, 0);
            remsd = next;
        }
    } else
#endif
        local_irq_enable();
}

static int process_backlog(struct napi_struct *napi, int quota)
{
    int work = 0;
    struct softnet_data *sd = container_of(napi, struct softnet_data, backlog);

#ifdef CONFIG_RPS
    /* Check if we have pending ipi, its better to send them now,
     * not waiting net_rx_action() end.
     */
    if (sd->rps_ipi_list) {
        local_irq_disable();
        net_rps_action_and_irq_enable(sd);
    }
#endif
    napi->weight = weight_p;
    local_irq_disable();
    while (work < quota) {
        struct sk_buff *skb;
        unsigned int qlen;

        while ((skb = __skb_dequeue(&sd->process_queue))) {
            local_irq_enable();
            __netif_receive_skb(skb);
            local_irq_disable();
            input_queue_head_incr(sd);
            if (++work >= quota) {
                local_irq_enable();
                return work;
            }
        }

        rps_lock(sd);
        qlen = skb_queue_len(&sd->input_pkt_queue);
        if (qlen)
            skb_queue_splice_tail_init(&sd->input_pkt_queue,
                           &sd->process_queue);

        if (qlen < quota - work) {
            /*
             * Inline a custom version of __napi_complete().
             * only current cpu owns and manipulates this napi,
             * and NAPI_STATE_SCHED is the only possible flag set on backlog.
             * we can use a plain write instead of clear_bit(),
             * and we dont need an smp_mb() memory barrier.
             */
            list_del(&napi->poll_list);
            napi->state = 0;

            quota = work + qlen;
        }
        rps_unlock(sd);
    }
    local_irq_enable();

    return work;
}

 

 

static void net_rps_send_ipi(struct softnet_data *remsd)
{
#ifdef CONFIG_RPS
        while (remsd) {
                struct softnet_data *next = remsd->rps_ipi_next;

                if (cpu_online(remsd->cpu))
                        smp_call_function_single_async(remsd->cpu, &remsd->csd);
                remsd = next;
        }
#endif
}

 

posted on 2021-03-15 10:50  tycoon3  阅读(281)  评论(0编辑  收藏  举报

导航