linux内核情景分析之强制性调度
从系统调用返回到用户空间是否调度,从ret_with_reschedule可看出,是否真正调度,取决于当前进程的pcb中的need_resched是否设置为1,那如何设置为1取决于以下几种情况:
时间中断处理程序,发现当前进程运行时间过长:每次发生时间中断,都要递减该进程的时间片,一旦count为0,强制调度,剥夺当前进程运行
void update_process_times(int user_tick){struct task_struct *p = current;int cpu = smp_processor_id(), system = user_tick ^ 1;update_one_process(p, user_tick, system, cpu);//统计信息而已if (p->pid) {if (--p->counter <= 0) {p->counter = 0;p->need_resched = 1;//强制调度}if (p->nice > 0)kstat.per_cpu_nice[cpu] += user_tick;elsekstat.per_cpu_user[cpu] += user_tick;kstat.per_cpu_system[cpu] += system;} else if (local_bh_count(cpu) || local_irq_count(cpu) > 1)kstat.per_cpu_system[cpu] += system;}
如果此时在用户态发生中断,进入内核态,p->counter减为0,那么p->need_resched就置为1,中断返回时就会强制调度。
如果此时发生系统调用,进入内核态,再发生中断,p->counter减为0,那么p->need_resched就置为1,中断返回后,然后系统调用返回时就会强制调度。
如果此时在用户态发生异常,进入内核态,再发生中断,p->counter减为0,那么p->need_resched就置为1,中断返回后,然后异常返回时就会强制调度。
第二种情况
唤醒一个睡眠进程,发现被唤醒的进程比当前进程权值高,need_sched设置为1
/** Wake up a process. Put it on the run-queue if it's not* already there. The "current" process is always on the* run-queue (except when the actual re-schedule is in* progress), and as such you're allowed to do the simpler* "current->state = TASK_RUNNING" to mark yourself runnable* without the overhead of this.*/inline void wake_up_process(struct task_struct * p){unsigned long flags;/** We want the common case fall through straight, thus the goto.*/spin_lock_irqsave(&runqueue_lock, flags);p->state = TASK_RUNNING;//设置为可执行状态if (task_on_runqueue(p))//如果已经到run队列goto out;add_to_runqueue(p);//加入run队列reschedule_idle(p);//将唤醒进程与当前进程比较,如果唤醒进程比当前进程权值高,那就把当前进程的need_resched设置为1out:spin_unlock_irqrestore(&runqueue_lock, flags);}
static void reschedule_idle(struct task_struct * p){......int this_cpu = smp_processor_id();struct task_struct *tsk;tsk = cpu_curr(this_cpu);//获取当前进程的task_struct数据结构if (preemption_goodness(tsk, p, this_cpu) > 1)//比较当前进程和被唤醒的进程的综合权值tsk->need_resched = 1;//如果被唤醒的进程的综合权值比当前进程的大,那么强制调度}
对于第三种情况,实际上应被视为自愿的让出。但是,从内核代码的形式上看,也是通过相同的办法,将当前进程的need_resched标志置为1,使得在进程返回用户空间前夕发生调度,所以也放在这一节。此类系统调用有两个,一个是sched_setscheduler(),另一个是sched_yield()。
系统调用sched_setscheduler()的作用是改变进程的调度政策。用户登录到系统后,第一个进程的适用调度政策为SCHED_OTHER,也就是默认为无实时要求的交互式应用。在fork()创建新进程时则将此进程适用的调度政策遗传给了子进程。但是,用户可以通过系统调用sched_setscheduler()改变其适用调度政策。
sched_setscheduler,内核态对应的代码如下:
asmlinkage long sys_sched_setscheduler(pid_t pid, int policy,struct sched_param *param){return setscheduler(pid, policy, param);}asmlinkage long sys_sched_setparam(pid_t pid, struct sched_param *param){return setscheduler(pid, -1, param);}
static int setscheduler(pid_t pid, int policy,struct sched_param *param){struct sched_param lp;struct task_struct *p;int retval;retval = -EINVAL;if (!param || pid < 0)goto out_nounlock;retval = -EFAULT;if (copy_from_user(&lp, param, sizeof(struct sched_param)))//从用户空间把sched_param结构拷贝到lpgoto out_nounlock;/** We play safe to avoid deadlocks.*/read_lock_irq(&tasklist_lock);spin_lock(&runqueue_lock);p = find_process_by_pid(pid);//通过pid找到task_structretval = -ESRCH;if (!p)goto out_unlock;if (policy < 0)//policy为-1policy = p->policy;//维持原来的政策else {retval = -EINVAL;if (policy != SCHED_FIFO && policy != SCHED_RR &&policy != SCHED_OTHER)//必须是这三种政策之一goto out_unlock;}/** Valid priorities for SCHED_FIFO and SCHED_RR are 1..99, valid* priority for SCHED_OTHER is 0.*/retval = -EINVAL;if (lp.sched_priority < 0 || lp.sched_priority > 99)//实时进程的priority必须处于0-99goto out_unlock;if ((policy == SCHED_OTHER) != (lp.sched_priority == 0))//如果政策是SCHED_OTHER,sched_priority必须是0goto out_unlock;retval = -EPERM;if ((policy == SCHED_FIFO || policy == SCHED_RR) &&!capable(CAP_SYS_NICE))goto out_unlock;if ((current->euid != p->euid) && (current->euid != p->uid) &&!capable(CAP_SYS_NICE))goto out_unlock;retval = 0;p->policy = policy;p->rt_priority = lp.sched_priority;if (task_on_runqueue(p))move_first_runqueue(p);//从可执行进程队列的当前位置移到队列的前部,使其在调度时处于较为有利的地位current->need_resched = 1;//强制调度out_unlock:spin_unlock(&runqueue_lock);read_unlock_irq(&tasklist_lock);out_nounlock:return retval;}
另一个系统调用sched_yield(),使运行中的进程可以为其他进程"让路",但并不进入睡眠。内核的实现sys_sched_yield,代码如下:
asmlinkage long sys_sched_yield(void){/** Trick. sched_yield() first counts the number of truly* 'pending' runnable processes, then returns if it's* only the current processes. (This test does not have* to be atomic.) In threaded applications this optimization* gets triggered quite often.*/int nr_pending = nr_running;#if CONFIG_SMPint i;// Substract non-idle processes running on other CPUs.for (i = 0; i < smp_num_cpus; i++)if (aligned_data[i].schedule_data.curr != idle_task(i))nr_pending--;#else// on UP this process is on the runqueue as wellnr_pending--;#endifif (nr_pending) {//正在等待的运行的进程数/** This process can only be rescheduled by us,* so this is safe without any locking.*/if (current->policy == SCHED_OTHER)//当前进程调度策略为sched_othercurrent->policy |= SCHED_YIELD;//SCHED_YIELD标志位置1,在_schedule_tail清0current->need_resched = 1;//强制调度}return 0;}
浙公网安备 33010602011771号