QEMU的初始化流程概览

场景

QEMU启动一个RISC-V 64位机器的初始化流程

参考链接

https://gevico.github.io/learning-qemu-docs/ch2/sec1/qemu-init/
另外,欢迎参加QEMU opencamp https://opencamp.cn/qemu/camp/2025, 训练营在2025 年 10 月 12 日至 11 月 23 日开展,好像可以中途加入

环境

  1. riscv64工具链
    1. 如果是ubuntu可以直接下载 https://github.com/riscv-collab/riscv-gnu-toolchain 这个仓库的release
    2. 如果没有预构建的产物,可以使用crosstool-ng构建riscv64的工具链。crosstool-ng仓库地址 https://github.com/crosstool-ng/crosstool-ng
    3. 或者直接从 https://github.com/riscv-collab/riscv-gnu-toolchain 源码构建,按着README来操作就可以
  2. qemu-system-riscv64,版本选择10.x
    1. 下载QEMU源码,然后使用下面设置参数
      ./configure --target-list=riscv64-softmmu \
           --extra-cflags="-O0 -g3" \
           --cross-prefix-riscv64=riscv64-unknown-elf- \
           --enable-rust --enable-debug
      
      这里只设置riscv64-softmmu,可以加快编译
      设置完毕之后make -j$(nproc)编译,产物在build文件夹
      注意,需要先配置好riscv64-unknown-elf工具链再配置QEMU

类型初始化和实例初始化

QEMU对地址空间和设备等都有面向对象的抽象,初始化的时候会先初始化类型然后初始化实例。QEMU对于不同的machine有不同的定义,如果使用-M virt启动,那么会找到virt 的machine定义然后执行virt_machine_class_init初始化类,执行virt_machine_instance_init初始化示例。

virt机器的类型定义是一个TypeInfo,是一个全局变量。

// hw/riscv/virt.c
static const TypeInfo virt_machine_typeinfo = {
    .name       = MACHINE_TYPE_NAME("virt"),
    .parent     = TYPE_MACHINE,
    .class_init = virt_machine_class_init,
    .instance_init = virt_machine_instance_init,
    .instance_size = sizeof(RISCVVirtState),
    .interfaces = (const InterfaceInfo[]) {
         { TYPE_HOTPLUG_HANDLER },
         { }
    },
};

依次查看virt_machine_class_initvirt_machine_instance_init的调用链

virt_machine_class_init

virt_machine_class_init的完整调用链如下:

(gdb) bt
#0  virt_machine_class_init (oc=0x5555576608f0, data=0x0) at ../hw/riscv/virt.c:1916
#1  0x0000555555c091d2 in type_initialize (ti=0x55555745a3a0) at ../qom/object.c:417
#2  0x0000555555c0ac9a in object_class_foreach_tramp (key=0x55555745a520, value=0x55555745a3a0, opaque=0x7fffffffcde0) at ../qom/object.c:1110
#3  0x00007ffff6e7fc5b in g_hash_table_foreach (hash_table=0x5555574158d0 = {...}, func=0x555555c0ac62 <object_class_foreach_tramp>, user_data=0x7fffffffcde0) at ../glib/ghash.c:2128
#4  0x0000555555c0ad91 in object_class_foreach (fn=0x555555c0af2d <object_class_get_list_tramp>, implements_type=0x5555564029b7 "machine", include_abstract=false, opaque=0x7fffffffce30) at ../qom/object.c:1132
#5  0x0000555555c0afb0 in object_class_get_list (implements_type=0x5555564029b7 "machine", include_abstract=false) at ../qom/object.c:1189
#6  0x000055555586ed8b in select_machine (qdict=0x555557469d10, errp=0x7fffffffce80) at ../system/vl.c:1675
#7  0x000055555587033f in qemu_create_machine (qdict=0x555557469d10) at ../system/vl.c:2187
#8  0x00005555558751e8 in qemu_init (argc=6, argv=0x7fffffffd218) at ../system/vl.c:3759
#9  0x0000555555d64296 in main (argc=6, argv=0x7fffffffd218) at ../system/main.c:71

关键调用流程

main->qemu_init->virt_machine_class_init

virt_machine_class_init函数里面设置了一个关键的初始化函数

// hw/riscv/virt.c
static void virt_machine_class_init(ObjectClass *oc, const void *data)
{
    MachineClass *mc = MACHINE_CLASS(oc);
    HotplugHandlerClass *hc = HOTPLUG_HANDLER_CLASS(oc);

    mc->desc = "RISC-V VirtIO board";
    // 关键初始化函数
    mc->init = virt_machine_init;
    mc->max_cpus = VIRT_CPUS_MAX;
    mc->default_cpu_type = TYPE_RISCV_CPU_BASE;
    mc->block_default_type = IF_VIRTIO;
    mc->no_cdrom = 1;
    mc->pci_allow_0_address = true;
    mc->possible_cpu_arch_ids = riscv_numa_possible_cpu_arch_ids;
    mc->cpu_index_to_instance_props = riscv_numa_cpu_index_to_props;
    mc->get_default_cpu_node_id = riscv_numa_get_default_cpu_node_id;

在实例创建之后,后续的完整初始化会通过virt_machine_init来完成

virt_machine_instance_init

在类型创建完毕之后,会创建实例
完整调用链如下:

#0  virt_machine_instance_init (obj=0x55555766c2c0) at ../hw/riscv/virt.c:1751
#1  0x0000555555c09244 in object_init_with_type (obj=0x55555766c2c0, ti=0x55555745a3a0) at ../qom/object.c:428
#2  0x0000555555c09837 in object_initialize_with_type (obj=0x55555766c2c0, size=4040, type=0x55555745a3a0) at ../qom/object.c:570
#3  0x0000555555c0a064 in object_new_with_type (type=0x55555745a3a0) at ../qom/object.c:774
#4  0x0000555555c0a09c in object_new_with_class (klass=0x5555576608f0) at ../qom/object.c:782
#5  0x0000555555870362 in qemu_create_machine (qdict=0x555557469d10) at ../system/vl.c:2190
#6  0x00005555558751e8 in qemu_init (argc=6, argv=0x7fffffffd218) at ../system/vl.c:3759
#7  0x0000555555d64296 in main (argc=6, argv=0x7fffffffd218) at ../system/main.c:71

关键调用路径和virt_machine_class_init类似

main->qemu_init->virt_machine_instance_init

加载OpenSBI BIOS

如果QEMU启动参数不指定-bios或者指定-bios default,那么默认使用的是QEMU社区提供的一个OpenSBI BIOS。在源码树下面有这个bin文件,配置并编译qemu之后会复制一份到build文件夹里面

learning-qemu-2025-yexuanyang (main) » find ./* -name 'opensbi-*.bin'                                                                                 ~/workspaces/learning-qemu-2025-yexuanyang  
./build/qemu-bundle/usr/local/share/qemu/opensbi-riscv32-generic-fw_dynamic.bin
./build/qemu-bundle/usr/local/share/qemu/opensbi-riscv64-generic-fw_dynamic.bin
./pc-bios/opensbi-riscv32-generic-fw_dynamic.bin
./pc-bios/opensbi-riscv64-generic-fw_dynamic.bin

首先断点到之前发现的关键初始化函数virt_machine_init,查看调用链。

#0  virt_machine_init (machine=0x55555766c2c0) at ../hw/riscv/virt.c:1529
#1  0x000055555563d2d1 in machine_run_board_init (machine=0x55555766c2c0, mem_path=0x0, errp=0x7fffffffce60) at ../hw/core/machine.c:1694
#2  0x0000555555871bd2 in qemu_init_board () at ../system/vl.c:2710
#3  0x0000555555871f65 in qmp_x_exit_preconfig (errp=0x555556abdb60 <error_fatal>) at ../system/vl.c:2804
#4  0x00005555558753a3 in qemu_init (argc=6, argv=0x7fffffffd218) at ../system/vl.c:3840
#5  0x0000555555d64296 in main (argc=6, argv=0x7fffffffd218) at ../system/main.c:71

关键路径如下

main->qemu_init->qemu_init_board->virt_machine_init

函数virt_machine_init在qemu初始化和板卡相关的配置时执行

virt_machine_init函数如下:

// hw/riscv/virt.c
static void virt_machine_init(MachineState *machine)
{
    RISCVVirtState *s = RISCV_VIRT_MACHINE(machine);
    MemoryRegion *system_memory = get_system_memory();
    MemoryRegion *mask_rom = g_new(MemoryRegion, 1);
    DeviceState *mmio_irqchip, *virtio_irqchip, *pcie_irqchip;
    int i, base_hartid, hart_count;
    int socket_count = riscv_socket_count(machine);

    s->memmap = virt_memmap;
    ...
    /* Initialize sockets */
    mmio_irqchip = virtio_irqchip = pcie_irqchip = NULL;
    for (i = 0; i < socket_count; i++) {
        g_autofree char *soc_name = g_strdup_printf("soc%d", i);

        if (!riscv_socket_check_hartids(machine, i)) {
            error_report("discontinuous hartids in socket%d", i);
            exit(1);
        }
        ...
    }
    ...
     /* register system main memory (actual RAM) */
    memory_region_add_subregion(system_memory, s->memmap[VIRT_DRAM].base,
                                machine->ram);

    /* boot rom */
    memory_region_init_rom(mask_rom, NULL, "riscv_virt_board.mrom",
                           s->memmap[VIRT_MROM].size, &error_fatal);
    memory_region_add_subregion(system_memory, s->memmap[VIRT_MROM].base,
                                mask_rom);
	
	...
	
	serial_mm_init(system_memory, s->memmap[VIRT_UART0].base,
        0, qdev_get_gpio_in(mmio_irqchip, UART0_IRQ), 399193,
        serial_hd(0), DEVICE_LITTLE_ENDIAN);
        
    ...
    
    s->machine_done.notify = virt_machine_done;
    qemu_add_machine_init_done_notifier(&s->machine_done);
}

我们只关注没有被...省略的代码,首先将传入的Object MachineState转换成RISCVVirtState,设置了这个对象的memmap,之后初始化CPU sockets,然后设置memory region,最后初始化一下serial_mm,允许串口输出BIOS的一些信息。函数最后注册了一个回调函数virt_machine_done,这个函数在虚拟机初始化完毕之后执行。

我们具体看看每一步的意义。

  1. 设置的memmap是用来标记一些内存段的起始地址和大小,比如VIRT_MROM内存段的起始地址是0x1000,大小是0xf000,在系统启动执行cpu_reset时会用到这个内存段的基址和大小。
  2. 初始化CPU sockets主要是可以按照簇(cluster)来创建多组 CPU 核心
  3. 按照地址空间初始化各种设备,我们可以关注一下boot rom的初始化,因为OpenSBI作为最优先运行的loader,大概率存储在ROM里面。在boot rom初始化里使用了s->memmap[VIRT_MROM].base开始的s->memmap[VIRT_MROM].size大小的一段区域内容。
  4. 初始化serial_mm是为了在运行BIOS的时候有串口可以输出信息
  5. 注册回调函数用于在机器初始化完毕之后调用virt_machine_done

基本可以猜测是virt_machine_done来执行OpenSBI BIOS,接下来查看这个函数做了什么

virt_machine_done

static void virt_machine_done(Notifier *notifier, void *data)
{
    RISCVVirtState *s = container_of(notifier, RISCVVirtState,
                                     machine_done);
    MachineState *machine = MACHINE(s);
    hwaddr start_addr = s->memmap[VIRT_DRAM].base;
    target_ulong firmware_end_addr, kernel_start_addr;
    const char *firmware_name = riscv_default_firmware_name(&s->soc[0]);
    uint64_t fdt_load_addr;
    uint64_t kernel_entry = 0;
    BlockBackend *pflash_blk0;
    RISCVBootInfo boot_info;
    /*
     * An user provided dtb must include everything, including
     * dynamic sysbus devices. Our FDT needs to be finalized.
     */
    if (machine->dtb == NULL) {
        finalize_fdt(s);
    }
    ...
	firmware_end_addr = riscv_find_and_load_firmware(machine, firmware_name,
                                                     &start_addr, NULL);
    ...
    fdt_load_addr = riscv_compute_fdt_addr(s->memmap[VIRT_DRAM].base,
                                           s->memmap[VIRT_DRAM].size,
                                           machine, &boot_info);
    riscv_load_fdt(fdt_load_addr, machine->fdt);

    /* load the reset vector */
    riscv_setup_rom_reset_vec(machine, &s->soc[0], start_addr,
                              s->memmap[VIRT_MROM].base,
                              s->memmap[VIRT_MROM].size, kernel_entry,
                              fdt_load_addr);
    ...
}

这个函数关键做了下面这些事

  1. 加载dtb或者fdt
  2. 使用riscv_find_and_load_firmware加载OpenSBI BIOS固件,这里加载的位置是s->memmap[VIRT_DRAM].base
  3. 重置CPU从s->memmap[VIRT_MROM].base开始执行

在之前virt_machine_init函数里面可以看到s->memmap被设置为了virt_memmap

static const MemMapEntry virt_memmap[] = {
    [VIRT_DEBUG] =        {        0x0,         0x100 },
    [VIRT_MROM] =         {     0x1000,        0xf000 },
    [VIRT_TEST] =         {   0x100000,        0x1000 },
    [VIRT_RTC] =          {   0x101000,        0x1000 },
    [VIRT_CLINT] =        {  0x2000000,       0x10000 },
    [VIRT_ACLINT_SSWI] =  {  0x2F00000,        0x4000 },
    [VIRT_PCIE_PIO] =     {  0x3000000,       0x10000 },
    [VIRT_IOMMU_SYS] =    {  0x3010000,        0x1000 },
    [VIRT_PLATFORM_BUS] = {  0x4000000,     0x2000000 },
    [VIRT_PLIC] =         {  0xc000000, VIRT_PLIC_SIZE(VIRT_CPUS_MAX * 2) },
    [VIRT_APLIC_M] =      {  0xc000000, APLIC_SIZE(VIRT_CPUS_MAX) },
    [VIRT_APLIC_S] =      {  0xd000000, APLIC_SIZE(VIRT_CPUS_MAX) },
    [VIRT_UART0] =        { 0x10000000,         0x100 },
    [VIRT_VIRTIO] =       { 0x10001000,        0x1000 },
    [VIRT_FW_CFG] =       { 0x10100000,          0x18 },
    [VIRT_FLASH] =        { 0x20000000,     0x4000000 },
    [VIRT_IMSIC_M] =      { 0x24000000, VIRT_IMSIC_MAX_SIZE },
    [VIRT_IMSIC_S] =      { 0x28000000, VIRT_IMSIC_MAX_SIZE },
    [VIRT_PCIE_ECAM] =    { 0x30000000,    0x10000000 },
    [VIRT_PCIE_MMIO] =    { 0x40000000,    0x40000000 },
    [VIRT_DRAM] =         { 0x80000000,           0x0 },
};

这就是为什么qemu-system-riscv64的第一条指令地址为0x1000,同时可以看到OpenSBI BIOS被加载到了0x80000000

我们继续看一下riscv_setup_rom_reset_vec函数做了什么

riscv_setup_rom_reset_vec

void riscv_setup_rom_reset_vec(MachineState *machine, RISCVHartArrayState *harts,
                               hwaddr start_addr,
                               hwaddr rom_base, hwaddr rom_size,
                               uint64_t kernel_entry,
                               uint64_t fdt_load_addr)
{
    int i;
    uint32_t start_addr_hi32 = 0x00000000;
    uint32_t fdt_load_addr_hi32 = 0x00000000;

    if (!riscv_is_32bit(harts)) {
        start_addr_hi32 = start_addr >> 32;
        fdt_load_addr_hi32 = fdt_load_addr >> 32;
    }
    /* reset vector */
    uint32_t reset_vec[10] = {
        0x00000297,                  /* 1:  auipc  t0, %pcrel_hi(fw_dyn) */
        0x02828613,                  /*     addi   a2, t0, %pcrel_lo(1b) */
        0xf1402573,                  /*     csrr   a0, mhartid  */
        0,
        0,
        0x00028067,                  /*     jr     t0 */
        start_addr,                  /* start: .dword */
        start_addr_hi32,
        fdt_load_addr,               /* fdt_laddr: .dword */
        fdt_load_addr_hi32,
                                     /* fw_dyn: */
    };
    if (riscv_is_32bit(harts)) {
        reset_vec[3] = 0x0202a583;   /*     lw     a1, 32(t0) */
        reset_vec[4] = 0x0182a283;   /*     lw     t0, 24(t0) */
    } else {
        reset_vec[3] = 0x0202b583;   /*     ld     a1, 32(t0) */
        reset_vec[4] = 0x0182b283;   /*     ld     t0, 24(t0) */
    }
    if (!harts->harts[0].cfg.ext_zicsr) {
        /*
         * The Zicsr extension has been disabled, so let's ensure we don't
         * run the CSR instruction. Let's fill the address with a non
         * compressed nop.
         */
        reset_vec[2] = 0x00000013;   /*     addi   x0, x0, 0 */
    }

    /* copy in the reset vector in little_endian byte order */
    for (i = 0; i < ARRAY_SIZE(reset_vec); i++) {
        reset_vec[i] = cpu_to_le32(reset_vec[i]);
    }
    rom_add_blob_fixed_as("mrom.reset", reset_vec, sizeof(reset_vec),
                          rom_base, &address_space_memory);
    riscv_rom_copy_firmware_info(machine, harts,
                                 rom_base, rom_size,
                                 sizeof(reset_vec),
                                 kernel_entry);
}

大概率是QEMU内置了一段汇编代码,执行之后会跳到实际的入口开始启动系统

我们在启动QEMU的时候调试一下,一步一步看看是不是和我们想的一样。

启动QEMU

$ ./build/qemu-system-riscv64 -M virt -device edu,id=edu1 -nographic -s -S 

然后用gdb连接,用si单指令运行

$ riscv64-unknown-elf-gdb
(gdb) target remote localhost:1234
(gdb) display /i $pc
(gdb) si

结果如下:

(gdb) target remote localhost:1234
Remote debugging using localhost:1234
warning: No executable has been specified and target does not support
determining executable automatically.  Try using the "file" command.
0x0000000000001000 in ?? ()
(gdb) display /i $pc
1: x/i $pc
=> 0x1000:      auipc   t0,0x0
(gdb) si
0x0000000000001004 in ?? ()
1: x/i $pc
=> 0x1004:      addi    a2,t0,40
(gdb) 
0x0000000000001008 in ?? ()
1: x/i $pc
=> 0x1008:      csrr    a0,mhartid
(gdb) 
0x000000000000100c in ?? ()
1: x/i $pc
=> 0x100c:      ld      a1,32(t0)
(gdb) 
0x0000000000001010 in ?? ()
1: x/i $pc
=> 0x1010:      ld      t0,24(t0)
(gdb) 
0x0000000000001014 in ?? ()
1: x/i $pc
=> 0x1014:      jr      t0
(gdb) 
0x0000000080000000 in ?? ()
1: x/i $pc
=> 0x80000000:  add     s0,a0,zero
(gdb) 

可以看到和我们想的是完全一致的,运行了一小段汇编之后,从真正的位置0x80000000开始启动系统,这个0x80000000就是加载的firmware,也就是OpenSBI BIOS。

vCPU初始化之后运行的第一条指令地址

对于每一个单独的vCPU,初始化函数是riscv_cpu_reset_hold

static void riscv_cpu_reset_hold(Object *obj, ResetType type)
{
#ifndef CONFIG_USER_ONLY
    uint8_t iprio;
    int i, irq, rdzero;
#endif
    CPUState *cs = CPU(obj);
    RISCVCPU *cpu = RISCV_CPU(cs);
    RISCVCPUClass *mcc = RISCV_CPU_GET_CLASS(obj);
    CPURISCVState *env = &cpu->env;
	...
	env->pc = env->resetvec;
	...
}

设置了env->pcenv->resetvec,而env->resetvec设置为了0x1000,可以在下面这个riscv_cpu_properties数组里面看到这个属性设置

static const Property riscv_cpu_properties[] = {
    DEFINE_PROP_BOOL("debug", RISCVCPU, cfg.debug, true),

    {.name = "pmu-mask", .info = &prop_pmu_mask},
    {.name = "pmu-num", .info = &prop_pmu_num}, /* Deprecated */

    {.name = "mmu", .info = &prop_mmu},
    {.name = "pmp", .info = &prop_pmp},
    {.name = "num-pmp-regions", .info = &prop_num_pmp_regions},

    {.name = "priv_spec", .info = &prop_priv_spec},
    {.name = "vext_spec", .info = &prop_vext_spec},

    {.name = "vlen", .info = &prop_vlen},
    {.name = "elen", .info = &prop_elen},

    {.name = "cbom_blocksize", .info = &prop_cbom_blksize},
    {.name = "cbop_blocksize", .info = &prop_cbop_blksize},
    {.name = "cboz_blocksize", .info = &prop_cboz_blksize},

    {.name = "mvendorid", .info = &prop_mvendorid},
    {.name = "mimpid", .info = &prop_mimpid},
    {.name = "marchid", .info = &prop_marchid},

#ifndef CONFIG_USER_ONLY
    DEFINE_PROP_UINT64("resetvec", RISCVCPU, env.resetvec, DEFAULT_RSTVEC),
    DEFINE_PROP_UINT64("rnmi-interrupt-vector", RISCVCPU, env.rnmi_irqvec,
                       DEFAULT_RNMI_IRQVEC),
    DEFINE_PROP_UINT64("rnmi-exception-vector", RISCVCPU, env.rnmi_excpvec,
                       DEFAULT_RNMI_EXCPVEC),
#endif

    DEFINE_PROP_BOOL("short-isa-string", RISCVCPU, cfg.short_isa_string, false),

    DEFINE_PROP_BOOL("rvv_ta_all_1s", RISCVCPU, cfg.rvv_ta_all_1s, false),
    DEFINE_PROP_BOOL("rvv_ma_all_1s", RISCVCPU, cfg.rvv_ma_all_1s, false),
    DEFINE_PROP_BOOL("rvv_vl_half_avl", RISCVCPU, cfg.rvv_vl_half_avl, false),
    DEFINE_PROP_BOOL("rvv_vsetvl_x0_vill", RISCVCPU, cfg.rvv_vsetvl_x0_vill, false),

    /*
     * write_misa() is marked as experimental for now so mark
     * it with -x and default to 'false'.
     */
    DEFINE_PROP_BOOL("x-misa-w", RISCVCPU, cfg.misa_w, false),
};
posted @ 2025-10-22 11:59  yexuanyang  阅读(6)  评论(0)    收藏  举报