Linux内核分析实验三:跟踪分析Linux内核的启动过程
陈琛+ 原创作品转载请注明出处《Linux内核分析》MOOC课
实验环境搭建
- 使用自己的Linux系统环境搭建MenuOS的过程
# 下载内核源代码编译内核
cd ~/LinuxKernel/
wget https://www.kernel.org/pub/linux/kernel/v3.x/linux-3.18.6.tar.xz
xz -d linux-3.18.6.tar.xz
tar -xvf linux-3.18.6.tar
cd linux-3.18.6
make i386_defconfig
make # 一般要编译很长时间,少则20分钟多则数小时
# 制作根文件系统
cd ~/LinuxKernel/
mkdir rootfs
git clone https://github.com/mengning/menu.git # 如果被墙,可以使用附件menu.zip
cd menu
gcc -o init linktable.c menu.c test.c -m32 -static –lpthread
cd ../rootfs
cp ../menu/init ./
find . | cpio -o -Hnewc |gzip -9 > ../rootfs.img
# 启动MenuOS系统
cd ~/LinuxKernel/
qemu -kernel linux-3.18.6/arch/x86/boot/bzImage -initrd rootfs.img
- 重新配置编译Linux使之携带调试信息
- 在原来配置的基础上,make menuconfig选中如下选项重新配置Linux,使之携带调试信息。
(实际编译过程中出现错误:scripts/kconfig/lxdialog/dialog.h:38:20: fatal error: curses.h: 没有那个文件或目录===》sudo apt-get install libncurses5-dev)
kernel hacking—>
Compile-time checks and compiler options->
[*] compile the kernel with debug info
- make重新编译(时间较长)
- 使用gdb跟踪调试内核
qemu -kernel linux-3.18.6/arch/x86/boot/bzImage -initrd rootfs.img -s -S # 关于-s和-S选项的说明:
# -S freeze CPU at startup (use ’c’ to start execution)
# -s shorthand for -gdb tcp::1234 若不想使用1234端口,则可以使用-gdb tcp:xxxx来取代-s选项
另开一个shell窗口
gdb
(gdb)file linux-3.18.6/vmlinux # 在gdb界面中targe remote之前加载符号表
(gdb)target remote:1234 # 建立gdb和gdbserver之间的连接,按c 让qemu上的Linux继续运行
(gdb)break start_kernel # 断点的设置可以在target remote之前,也可以在之后
init是第一个用户态进程,是1号进程
init/main.c start_kernel函数:内核启动的起点,不管分析内核的哪一部分,都会涉及到start_kernel,好多模块的初始化都在这里进行.
全局变量init_stack 相当于之前手工创建的pcb,0号进程,即最终的idle进程
实验过程
1.实验过程截图
2.具体代码分析
//start_kernel函数里有大量内核模块初始化代码,只截取部分说明
asmlinkage __visible void __init start_kernel(void)
{
char *command_line;
char *after_dashes;
/*
* Need to run as early as possible, to initialize the
* lockdep hash:
*/
lockdep_init();
//init_task内核初始化的一个进程,最终进化为idle进程
set_task_stack_end_magic(&init_task);
smp_setup_processor_id();
debug_objects_early_init();
.....
//内核初始化过程中另外一个重要函数
rest_init();
}
//init_task.c
/* Initial task structure */
struct task_struct init_task = INIT_TASK(init_task);
//通过该macro初始化init_task
#define INIT_TASK(tsk) \
{ \
.state = 0, \
.stack = &init_thread_info, \
.usage = ATOMIC_INIT(2), \
.flags = PF_KTHREAD, \
.prio = MAX_PRIO-20, \
.static_prio = MAX_PRIO-20, \
.normal_prio = MAX_PRIO-20, \
.policy = SCHED_NORMAL, \
.cpus_allowed = CPU_MASK_ALL, \
.nr_cpus_allowed= NR_CPUS, \
.mm = NULL, \
.active_mm = &init_mm, \
.se = { \
.group_node = LIST_HEAD_INIT(tsk.se.group_node), \
}, \
.rt = { \
.run_list = LIST_HEAD_INIT(tsk.rt.run_list), \
.time_slice = RR_TIMESLICE, \
}, \
.tasks = LIST_HEAD_INIT(tsk.tasks), \
INIT_PUSHABLE_TASKS(tsk) \
INIT_CGROUP_SCHED(tsk) \
.ptraced = LIST_HEAD_INIT(tsk.ptraced), \
.ptrace_entry = LIST_HEAD_INIT(tsk.ptrace_entry), \
.real_parent = &tsk, \
.parent = &tsk, \
.children = LIST_HEAD_INIT(tsk.children), \
.sibling = LIST_HEAD_INIT(tsk.sibling), \
.group_leader = &tsk, \
RCU_POINTER_INITIALIZER(real_cred, &init_cred), \
RCU_POINTER_INITIALIZER(cred, &init_cred), \
.comm = INIT_TASK_COMM, \
.thread = INIT_THREAD, \
.fs = &init_fs, \
.files = &init_files, \
.signal = &init_signals, \
.sighand = &init_sighand, \
.nsproxy = &init_nsproxy, \
.pending = { \
.list = LIST_HEAD_INIT(tsk.pending.list), \
.signal = {{0}}}, \
.blocked = {{0}}, \
.alloc_lock = __SPIN_LOCK_UNLOCKED(tsk.alloc_lock), \
.journal_info = NULL, \
.cpu_timers = INIT_CPU_TIMERS(tsk.cpu_timers), \
.pi_lock = __RAW_SPIN_LOCK_UNLOCKED(tsk.pi_lock), \
.timer_slack_ns = 50000, /* 50 usec default slack */ \
.pids = { \
[PIDTYPE_PID] = INIT_PID_LINK(PIDTYPE_PID), \
[PIDTYPE_PGID] = INIT_PID_LINK(PIDTYPE_PGID), \
[PIDTYPE_SID] = INIT_PID_LINK(PIDTYPE_SID), \
}, \
.thread_group = LIST_HEAD_INIT(tsk.thread_group), \
.thread_node = LIST_HEAD_INIT(init_signals.thread_head), \
INIT_IDS \
INIT_PERF_EVENTS(tsk) \
INIT_TRACE_IRQFLAGS \
INIT_LOCKDEP \
INIT_FTRACE_GRAPH \
INIT_TRACE_RECURSION \
INIT_TASK_RCU_PREEMPT(tsk) \
INIT_TASK_RCU_TASKS(tsk) \
INIT_CPUSET_SEQ(tsk) \
INIT_RT_MUTEXES(tsk) \
INIT_VTIME(tsk) \
}
static noinline void __init_refok rest_init(void)
{
int pid;
rcu_scheduler_starting();
/*
* We need to spawn init first so that it obtains pid 1, however
* the init task will end up wanting to create kthreads, which, if
* we schedule it before we create kthreadd, will OOPS.
*/
//通过kernel_thread创建第一个用户态进程,默认为根目录下的init程序
kernel_thread(kernel_init, NULL, CLONE_FS);
numa_default_policy();
//创建内核线程,管理资源,服务内核线程,2号进程
pid = kernel_thread(kthreadd, NULL, CLONE_FS | CLONE_FILES);
rcu_read_lock();
kthreadd_task = find_task_by_pid_ns(pid, &init_pid_ns);
rcu_read_unlock();
complete(&kthreadd_done);
/*
* The boot idle thread must execute schedule()
* at least once to get things moving:
*/
init_idle_bootup_task(current);
schedule_preempt_disabled();
/* Call into cpu_idle with preempt disabled */
cpu_startup_entry(CPUHP_ONLINE);
}
static int __ref kernel_init(void *unused)
{
int ret;
kernel_init_freeable();
/* need to finish all async __init code before freeing the memory */
async_synchronize_full();
free_initmem();
mark_rodata_ro();
system_state = SYSTEM_RUNNING;
numa_default_policy();
flush_delayed_fput();
if (ramdisk_execute_command) {
//重点是这句run_init_process函数
ret = run_init_process(ramdisk_execute_command);
if (!ret)
return 0;
pr_err("Failed to execute %s (error %d)\n",
ramdisk_execute_command, ret);
}
/*
* We try each of these until one succeeds.
*
* The Bourne shell can be used instead of init if we are
* trying to recover a really broken machine.
*/
if (execute_command) {
ret = run_init_process(execute_command);
if (!ret)
return 0;
pr_err("Failed to execute %s (error %d). Attempting defaults...\n",
execute_command, ret);
}
if (!try_to_run_init_process("/sbin/init") ||
!try_to_run_init_process("/etc/init") ||
!try_to_run_init_process("/bin/init") ||
!try_to_run_init_process("/bin/sh"))
return 0;
panic("No working init found. Try passing init= option to kernel. "
"See Linux Documentation/init.txt for guidance.");
}
int kthreadd(void *unused)
{
struct task_struct *tsk = current;
/* Setup a clean context for our children to inherit. */
set_task_comm(tsk, "kthreadd");
ignore_signals(tsk);
set_cpus_allowed_ptr(tsk, cpu_all_mask);
set_mems_allowed(node_states[N_MEMORY]);
current->flags |= PF_NOFREEZE;
for (;;) {
set_current_state(TASK_INTERRUPTIBLE);
if (list_empty(&kthread_create_list))
schedule();
__set_current_state(TASK_RUNNING);
spin_lock(&kthread_create_lock);
while (!list_empty(&kthread_create_list)) {
struct kthread_create_info *create;
create = list_entry(kthread_create_list.next,
struct kthread_create_info, list);
list_del_init(&create->list);
spin_unlock(&kthread_create_lock);
create_kthread(create);
spin_lock(&kthread_create_lock);
}
spin_unlock(&kthread_create_lock);
}
return 0;
}
idle.c
void cpu_startup_entry(enum cpuhp_state state)
{
/*
* This #ifdef needs to die, but it's too late in the cycle to
* make this generic (arm and sh have never invoked the canary
* init for the non boot cpus!). Will be fixed in 3.11
*/
#ifdef CONFIG_X86
/*
* If we're the non-boot CPU, nothing set the stack canary up
* for us. The boot CPU already has it initialized but no harm
* in doing it again. This is a good place for updating it, as
* we wont ever return from this function (so the invalid
* canaries already on the stack wont ever trigger).
*/
boot_init_stack_canary();
#endif
arch_cpu_idle_prepare();
//当系统没有进程需要执行时就调度到idle进程
cpu_idle_loop();
}
总结部分
抛去其他内核模块初始化过程,只考虑进程模块初始化,大致过程可以是:init_task是0号进程,在start_kernel内核一启动时就一直存在;然后这个0号进程在rest_init中创建了1号进程kernel_init和2号进程kthreadd。
关于以上三个进程的详细参考资料可以参考
心得
虽然跟着老师的视频和参考资料可以将内核初始化过程中进程模块进行调试和代码阅读,大致的路线可以掌握,但是三个进程创建的过程具体代码还是有不少搞不懂的地方,还有待于进一步学习。

浙公网安备 33010602011771号