2018-2019-1 20189221 《Linux内核原理与分析》第七周作业

2018-2019-1 20189221 《Linux内核原理与分析》第七周作业

实验六 分析Linux内核创建一个新进程的过程

代码分析

task_struct:


struct task_struct { 
volatile long state;        //进程状态/* -1 unrunnable, 0 runnable, >0 stopped */
void *stack;                // 指定进程内核堆栈
pid_t pid;                  //进程标识符
unsigned int rt_priority;   //实时优先级
unsigned int policy;        //调度策略
struct files_struct *files; //系统打开文件
…
}

fork、vfork 和 clone 都可创建新进程,均通过 do_fork 来创建进程

do_fork进程:

新建进程:


/*
1694 * Create a kernel thread.
1695 */
1696pid_t kernel_thread(int (*fn)(void *), void *arg, unsigned long flags)
1697{
1698	return do_fork(flags|CLONE_VM|CLONE_UNTRACED, (unsigned long)fn,
1699		(unsigned long)arg, NULL, NULL);
1700}
1701
1702#ifdef __ARCH_WANT_SYS_FORK
1703SYSCALL_DEFINE0(fork)
1704{
1705#ifdef CONFIG_MMU
1706	return do_fork(SIGCHLD, 0, 0, NULL, NULL);
1707#else
1708	/* can not support in nommu mode */
1709	return -EINVAL;
1710#endif
1711}
1712#endif
1713
1714#ifdef __ARCH_WANT_SYS_VFORK
1715SYSCALL_DEFINE0(vfork)
1716{
1717	return do_fork(CLONE_VFORK | CLONE_VM | SIGCHLD, 0,
1718			0, NULL, NULL);
1719}
1720#endif
1721
1722#ifdef __ARCH_WANT_SYS_CLONE
1723#ifdef CONFIG_CLONE_BACKWARDS
1724SYSCALL_DEFINE5(clone, unsigned long, clone_flags, unsigned long, newsp,
1725		 int __user *, parent_tidptr,
1726		 int, tls_val,
1727		 int __user *, child_tidptr)
1728#elif defined(CONFIG_CLONE_BACKWARDS2)
1729SYSCALL_DEFINE5(clone, unsigned long, newsp, unsigned long, clone_flags,
1730		 int __user *, parent_tidptr,
1731		 int __user *, child_tidptr,
1732		 int, tls_val)
1733#elif defined(CONFIG_CLONE_BACKWARDS3)
1734SYSCALL_DEFINE6(clone, unsigned long, clone_flags, unsigned long, newsp,
1735		int, stack_size,
1736		int __user *, parent_tidptr,
1737		int __user *, child_tidptr,
1738		int, tls_val)
1739#else
1740SYSCALL_DEFINE5(clone, unsigned long, clone_flags, unsigned long, newsp,
1741		 int __user *, parent_tidptr,
1742		 int __user *, child_tidptr,
1743		 int, tls_val)
1744#endif
1745{
1746	return do_fork(clone_flags, newsp, 0, parent_tidptr, child_tidptr);
1747}
1748#endif
1749
1750#ifndef ARCH_MIN_MMSTRUCT_ALIGN
1751#define ARCH_MIN_MMSTRUCT_ALIGN 0
1752#endif
1753
1754static void sighand_ctor(void *data)
1755{
1756	struct sighand_struct *sighand = data;
1757
1758	spin_lock_init(&sighand->siglock);
1759	init_waitqueue_head(&sighand->signalfd_wqh);
1760}
1761
1762void __init proc_caches_init(void)
1763{
1764	sighand_cachep = kmem_cache_create("sighand_cache",
1765			sizeof(struct sighand_struct), 0,
1766			SLAB_HWCACHE_ALIGN|SLAB_PANIC|SLAB_DESTROY_BY_RCU|
1767			SLAB_NOTRACK, sighand_ctor);
1768	signal_cachep = kmem_cache_create("signal_cache",
1769			sizeof(struct signal_struct), 0,
1770			SLAB_HWCACHE_ALIGN|SLAB_PANIC|SLAB_NOTRACK, NULL);
1771	files_cachep = kmem_cache_create("files_cache",
1772			sizeof(struct files_struct), 0,
1773			SLAB_HWCACHE_ALIGN|SLAB_PANIC|SLAB_NOTRACK, NULL);
1774	fs_cachep = kmem_cache_create("fs_cache",
1775			sizeof(struct fs_struct), 0,
1776			SLAB_HWCACHE_ALIGN|SLAB_PANIC|SLAB_NOTRACK, NULL);
1777	/*
1778	 * FIXME! The "sizeof(struct mm_struct)" currently includes the
1779	 * whole struct cpumask for the OFFSTACK case. We could change
1780	 * this to *only* allocate as much of it as required by the
1781	 * maximum number of CPU's we can ever have.  The cpumask_allocation
1782	 * is at the end of the structure, exactly for that reason.
1783	 */
1784	mm_cachep = kmem_cache_create("mm_struct",
1785			sizeof(struct mm_struct), ARCH_MIN_MMSTRUCT_ALIGN,
1786			SLAB_HWCACHE_ALIGN|SLAB_PANIC|SLAB_NOTRACK, NULL);
1787	vm_area_cachep = KMEM_CACHE(vm_area_struct, SLAB_PANIC);
1788	mmap_init();
1789	nsproxy_cache_init();
1790}
1791
1792/*
1793 * Check constraints on flags passed to the unshare system call.
1794 */
1795static int check_unshare_flags(unsigned long unshare_flags)
1796{
1797	if (unshare_flags & ~(CLONE_THREAD|CLONE_FS|CLONE_NEWNS|CLONE_SIGHAND|
1798				CLONE_VM|CLONE_FILES|CLONE_SYSVSEM|
1799				CLONE_NEWUTS|CLONE_NEWIPC|CLONE_NEWNET|
1800				CLONE_NEWUSER|CLONE_NEWPID))
1801		return -EINVAL;
1802	/*
1803	 * Not implemented, but pretend it works if there is nothing to
1804	 * unshare. Note that unsharing CLONE_THREAD or CLONE_SIGHAND
1805	 * needs to unshare vm.
1806	 */
1807	if (unshare_flags & (CLONE_THREAD | CLONE_SIGHAND | CLONE_VM)) {
1808		/* FIXME: get_task_mm() increments ->mm_users */
1809		if (atomic_read(&current->mm->mm_users) > 1)
1810			return -EINVAL;
1811	}
1812
1813	return 0;
1814}
1815

ret_from_fork:

*childregs = *current_pt_regs();  //复制内核堆栈(复制的pt_regs,是SAVE_ALL中系统调用压栈的那一部分。)
childregs->ax = 0;                             //  子进程的fork返回0
     
p->thread.sp = (unsigned long) childregs;       // 调度到子进程时的内核栈顶
p->thread.ip = (unsigned long) ret_from_fork;    //调度到子进程时的第一条指令地址

fork函数对应的内核处理过程sys_clone

  • fork、vfork和clone三个系统调用都可以创建一个新进程,都是通过调用do_fork来实现进程的创建
  • 创建新进程需要先复制一个PCB:task_struct
  • 再给新进程分配一个新的内核堆栈
ti = alloc_thread_info_node(tsk, node);
tsk->stack = ti;
setup_thread_stack(tsk, orig); //这里只是复制thread_info,而非复制内核堆栈
  • 再修改复制过来的进程数据,比如pid、进程链表等等,见copy_process内部
  • 从用户态的代码看fork();函数返回了两次,即在父子进程中各返回一次
*childregs = *current_pt_regs(); //复制内核堆栈
childregs->ax = 0; //为什么子进程的fork返回0,这里就是原因!

p->thread.sp = (unsigned long) childregs; //调度到子进程时的内核栈顶
p->thread.ip = (unsigned long) ret_from_fork; //调度到子进程时的第一条指令地址
  • do_fork完成了创建中的大部分工作,该函数调用copy_process()函数,然后让进程开始运行。copy_process()函数工作如下:

    1、调用dup_task_struct()为新进程创建一个内核栈、thread_info结构和task_struct,这些值与当前进程的值相同
    2、检查
    3、子进程着手使自己与父进程区别开来。进程描述符内的许多成员被清0或设为初始值。
    4、子进程状态被设为TASK_UNINTERRUPTIBLE,以保证它不会投入运行
    5、copy_process()调用copy_flags()以更新task_struct的flags成员。表明进程是否拥有超级用户权限的PF_SUPERPRIV标志被清0。表明进程还没有调用exec()函数的PF_FORKNOEXEC标志被设置
    6、调用alloc_pid()为新进程分配一个有效的PID
    7、根据传递给clone()的参数标志,copy_process()拷贝或共享打开的文件、文件系统信息、信号处理函数、进程地址空间和命名空间等
    8、最后,copy_process()做扫尾工作并返回一个指向子进程的指针

gdb跟踪

验证fork功能:


在创建进程的关键函数上设置断点:

跟踪调试过程:

copy_thread函数为子进程准备了上下文堆栈信息,其工作流程如下:

获取子进程寄存器信息的存放位置
对子进程的thread.sp赋值,即进程的esp寄存器的值。
如果创建的是内核线程,则运行位置是ret_from_kernel_thread,将这段代码的地址赋给thread.ip,之后准备其他寄存器信息,退出
将父进程的寄存器信息复制给子进程。
子进程的eax寄存器值置0。
子进程从ret_from_fork开始执行,所以它的地址赋给thread.ip,也就是将来的eip寄存器。

继续调试:

遇到的问题

PID相关知识:
pid结构体:

struct pid {
    struct hlist_head tasks;        //指回 pid_link 的 node
    int nr;                       //PID
    struct hlist_node pid_chain;    //pid hash 散列表结点
};

pid_vnr:

 pid_t pid_vnr(struct pid*pid)
{
     return pid_nr_ns(pid,current->nsproxy->pid_ns); //current->nsproxy->pid_ns是当前pid_namespace
} 

获得 pid 实例之后,再根据 pid 中的numbers 数组中 uid 信息,获得局部PID。

pid_t pid_nr_ns(struct pid *pid, struct pid_namespace *ns)
{
    struct upid *upid;
    pid_t nr = 0;

    if (pid && ns->level <= pid->level) {
        upid = &pid->numbers[ns->level];
        if (upid->ns == ns)
            nr = upid->nr;
    }
    return nr;
}
posted @ 2018-11-23 00:34  古厉  阅读(366)  评论(0编辑  收藏  举报