OpenSBI初始化分析
OpenSBI初始化
启动流程:主要涉及三个阶段
- .S汇编阶段,主要是Opensbi的底层初始化阶段,为C准备执行环境;
- 设备初始化阶段,此阶段正式进入C环境;
- 会跳转到二级 Bootloader,正式进入S模式;
OpenSBI底层初始化
确定启动的hart id
非启动hart转入_wait_relocate_copy_done等待启动hart初始化完成。
在fw_boot_hart中:fw_jump和fw_payload模式将a0返回为-1;fw_dynamic将根据a2中的dynamic_info去决定a0的值(-1或者为一个固定值,应该是设置的),a1值为2;
_start:
/* Find preferred boot HART id */
MOV_3R s0, a0, s1, a1, s2, a2
call fw_boot_hart
add a6, a0, zero
MOV_3R a0, s0, a1, s1, a2, s2
li a7, -1
beq a6, a7, _try_lottery
/* Jump to relocation wait loop if we are not boot hart */
bne a0, a6, _wait_relocate_copy_done
根据hart id执行重定位(lottery算法确定主hart)
_try_lottery:
/* Jump to relocation wait loop if we don't get relocation lottery */
la a6, _relocate_lottery
li a7, 1
amoadd.w a6, a7, (a6)
/*_relocate_lottery不等于0,即跳到boot hart做重定位的地方。若多核一起启动执行,只有最先执行源自指令的a6是0,后续执行到这里的从核直接跳到重定位完成地址*/
bnez a6, _wait_relocate_copy_done /*这里没抢到锁的核将到_wait_relocate_copy_done处循环*/
/* Save load address */
la t0, _load_start
la t1, _start
REG_S t1, 0(t0) //将_start指向_load_start
当加载地址和链接地址不相同时,需要进行重定位
/* Relocate if load address != link address
*
* 地址分布:
* _link_start <-----t0
*
* _link_end <-----t1
*
* _load_start <-----t2
*
* _load_end <-----t3
*
*/
_relocate:
la t0, _link_start
REG_L t0, 0(t0)
la t1, _link_end
REG_L t1, 0(t1)
la t2, _load_start
REG_L t2, 0(t2)
sub t3, t1, t0
add t3, t3, t2
beq t0, t2, _relocate_done
la t4, _relocate_done
sub t4, t4, t2
add t4, t4, t0
blt t2, t0, _relocate_copy_to_upper
地址拷贝:首先要避免地址重叠,实际copy时需要区分copy到高地址或低地址,因此,copy始于地址开头或结尾的效果不同。若_load_start < _link_start,则需要往高地址拷贝数据。
_relocate_copy_to_upper:
// load_end > link_start将存在地址重叠,将进入异常
ble t3, t0, _relocate_copy_to_upper_loop
la t2, _relocate_lottery
BRANGE t0, t3, t2, _start_hang
la t2, _boot_status
BRANGE t0, t3, t2, _start_hang
la t2, _relocate
la t5, _relocate_done
BRANGE t0, t3, t2, _start_hang
BRANGE t0, t3, t5, _start_hang
BRANGE t2, t5, t0, _start_hang
重定位的地址拷贝发生在_relocate_copy_to_upper_loop中,保证不发生重叠,所以从
_relocate_copy_to_upper_loop:
add t3, t3, -__SIZEOF_POINTER__
add t1, t1, -__SIZEOF_POINTER__
REG_L t2, 0(t3)
REG_S t2, 0(t1)
blt t0, t1, _relocate_copy_to_upper_loop
jr t4
低地址拷贝同理,方向与高地址相反
_relocate_copy_to_lower:
ble t1, t2, _relocate_copy_to_lower_loop
la t3, _relocate_lottery
BRANGE t2, t1, t3, _start_hang
la t3, _boot_status
BRANGE t2, t1, t3, _start_hang
la t3, _relocate
la t5, _relocate_done
BRANGE t2, t1, t3, _start_hang
BRANGE t2, t1, t5, _start_hang
BRANGE t3, t5, t2, _start_hang
_relocate_copy_to_lower_loop:
REG_L t3, 0(t2)
REG_S t3, 0(t0)
add t0, t0, __SIZEOF_POINTER__
add t2, t2, __SIZEOF_POINTER__
blt t0, t1, _relocate_copy_to_lower_loop
jr t4
再来看下非启动hart的动作:一直在等待启动hart的link_start和load_start相同,也就是_boot_status变为BOOT_STATUS_RELOCATE_DONE。接下来可以看到在几个nop指令后,非启动hart会跳转到_wait_for_boot_hart的链接地址,将继续等待启动hart完成其他的初始化工作。
_wait_relocate_copy_done:
la t0, _start
la t1, _link_start
REG_L t1, 0(t1)
//若load adress与link address不同,表示重定位未完成
beq t0, t1, _wait_for_boot_hart
la t2, _boot_status
//记录_wait_for_boot_hart的链接地址
la t3, _wait_for_boot_hart
sub t3, t3, t0
add t3, t3, t1
1:
/* waitting for relocate copy done (_boot_status == 1) */
li t4, BOOT_STATUS_RELOCATE_DONE
REG_L t5, 0(t2)
/* Reduce the bus traffic so that boot hart may proceed faster */
nop
nop
nop
bgt t4, t5, 1b
jr t3
重定位结束:设置_boot_status为BOOT_STATUS_RELOCATE_DONE
_relocate_done:
/*
* Mark relocate copy done
* Use _boot_status copy relative to the load address
*/
la t0, _boot_status
la t1, _link_start
REG_L t1, 0(t1)
la t2, _load_start
REG_L t2, 0(t2)
sub t0, t0, t1
add t0, t0, t2
/* 重定位完成,设置标志*/
li t1, BOOT_STATUS_RELOCATE_DONE
REG_S t1, 0(t0)
fence rw, rw
/* At this point we are running from link address */
/* Reset all registers for boot HART */
li ra, 0
call _reset_regs
/* Zero-out BSS */
la s4, _bss_start
la s5, _bss_end
清除bss段,设置sp指针,8k的临时栈空间,以及scratch空间的构造
写状态寄存器CSR_MTVEC,保存发生异常时处理器需要跳转到的地址
设置sp寄存器,栈指针:栈空间为8k,向下生长
保存:a0~a4寄存器的值,用于fw_save_info,只有fw_dynamic的模式才会用到这些寄存器(因为暂时只有dynamic模式才会将这个结构体传向内核)
_bss_zero:
REG_S zero, (s4)
add s4, s4, __SIZEOF_POINTER__
blt s4, s5, _bss_zero
/* Setup temporary trap handler */
la s4, _start_hang
csrw CSR_MTVEC, s4
/* Setup temporary stack */
la s4, _fw_end
li s5, (SBI_SCRATCH_SIZE * 2)
add sp, s4, s5
/* Allow main firmware to save info */
MOV_5R s0, a0, s1, a1, s2, a2, s3, a3, s4, a4
call fw_save_info
MOV_5R a0, s0, a1, s1, a2, s2, a3, s3, a4, s4
这部分代码主要做了两件事:配置设备数和scratch空间
(1)配置设备树(前提是开启FW_FDT_PATH这个宏),platform相关数据结构
- 保存a0~a4寄存器的值
- 平台设备初始化,这部分调用相应platfrom的初始化
(2)重要的是在这里为每个hart都配置了scratch
- s7:是hart个数
- s8:栈空间大小
- 为每个hart都建立scratch空间:将tp指向tp+s7*s8
#ifdef FW_FDT_PATH
/* Override previous arg1 */
la a1, fw_fdt_bin
#endif
/*
* Initialize platform
* Note: The a0 to a4 registers passed to the
* firmware are parameters to this function.
*/
MOV_5R s0, a0, s1, a1, s2, a2, s3, a3, s4, a4
call fw_platform_init
add t0, a0, zero
MOV_5R a0, s0, a1, s1, a2, s2, a3, s3, a4, s4
add a1, t0, zero
/* Preload HART details
* s7 -> HART Count
* s8 -> HART Stack Size
*/
la a4, platform
#if __riscv_xlen == 64
lwu s7, SBI_PLATFORM_HART_COUNT_OFFSET(a4)
lwu s8, SBI_PLATFORM_HART_STACK_SIZE_OFFSET(a4)
#else
lw s7, SBI_PLATFORM_HART_COUNT_OFFSET(a4)
lw s8, SBI_PLATFORM_HART_STACK_SIZE_OFFSET(a4)
#endif
/* Setup scratch space for all the HARTs*/
la tp, _fw_end
mul a5, s7, s8
add tp, tp, a5
/* Keep a copy of tp */
add t3, tp, zero
/* Counter */
li t2, 1
/* hartid 0 is mandated by ISA */
li t1, 0
下面看下默认的generic平台下的fw_platform_init函数,a0存的是hart id,a1则会是返回值也就是设备树的地址,这个函数会对platfrom进行一些简单的初始化工作:
/*
* The fw_platform_init() function is called very early on the boot HART
* OpenSBI reference firmwares so that platform specific code get chance
* to update "platform" instance before it is used.
*
* The arguments passed to fw_platform_init() function are boot time state
* of A0 to A4 register. The "arg0" will be boot HART id and "arg1" will
* be address of FDT passed by previous booting stage.
*
* The return value of fw_platform_init() function is the FDT location. If
* FDT is unchanged (or FDT is modified in-place) then fw_platform_init()
* can always return the original FDT location (i.e. 'arg1') unmodified.
*/
unsigned long fw_platform_init(unsigned long arg0, unsigned long arg1,
unsigned long arg2, unsigned long arg3,
unsigned long arg4)
{
const char *model;
void *fdt = (void *)arg1;
u32 hartid, hart_count = 0;
int rc, root_offset, cpus_offset, cpu_offset, len;
root_offset = fdt_path_offset(fdt, "/");
if (root_offset < 0)
goto fail;
fw_platform_lookup_special(fdt, root_offset);
model = fdt_getprop(fdt, root_offset, "model", &len);
if (model)
sbi_strncpy(platform.name, model, sizeof(platform.name));
if (generic_plat && generic_plat->features)
platform.features = generic_plat->features(generic_plat_match);
cpus_offset = fdt_path_offset(fdt, "/cpus");
if (cpus_offset < 0)
goto fail;
fdt_for_each_subnode(cpu_offset, fdt, cpus_offset) {
rc = fdt_parse_hart_id(fdt, cpu_offset, &hartid);
if (rc)
continue;
if (SBI_HARTMASK_MAX_BITS <= hartid)
continue;
generic_hart_index2id[hart_count++] = hartid;
}
platform.hart_count = hart_count;
/* Return original FDT pointer */
return arg1;
fail:
while (1)
下个阶段将进入_scratch_init的阶段,也就是scratch空间的初始化工作,相当于初始化struct sbi_scratch结构体:
/*
* t3 -> the firmware end address 程序会从二进制加载的结尾分配内存,t3指向link address end,也就是说boot hart在link address end设置各hart的scratch space和 stack space。
* s7 -> HART count 内核数
* s8 -> HART stack size 内核栈大小
/*
_scratch_init:
add tp, t3, zero
mul a5, s8, t1
sub tp, tp, a5
li a5, SBI_SCRATCH_SIZE
sub tp, tp, a5
/* Initialize scratch space */
/* Store fw_start and fw_size in scratch space */
la a4, _fw_start
la a5, _fw_end
mul t0, s7, s8
add a5, a5, t0
sub a5, a5, a4
REG_S a4, SBI_SCRATCH_FW_START_OFFSET(tp)
REG_S a5, SBI_SCRATCH_FW_SIZE_OFFSET(tp)
/* Store next arg1 in scratch space */
MOV_3R s0, a0, s1, a1, s2, a2
call fw_next_arg1
REG_S a0, SBI_SCRATCH_NEXT_ARG1_OFFSET(tp)
MOV_3R a0, s0, a1, s1, a2, s2
/* Store next address in scratch space */
MOV_3R s0, a0, s1, a1, s2, a2
call fw_next_addr
REG_S a0, SBI_SCRATCH_NEXT_ADDR_OFFSET(tp)
MOV_3R a0, s0, a1, s1, a2, s2
/* Store next mode in scratch space */
MOV_3R s0, a0, s1, a1, s2, a2
call fw_next_mode
REG_S a0, SBI_SCRATCH_NEXT_MODE_OFFSET(tp)
MOV_3R a0, s0, a1, s1, a2, s2
/* Store warm_boot address in scratch space */
la a4, _start_warm
REG_S a4, SBI_SCRATCH_WARMBOOT_ADDR_OFFSET(tp)
/* Store platform address in scratch space */
la a4, platform
REG_S a4, SBI_SCRATCH_PLATFORM_ADDR_OFFSET(tp)
/* Store hartid-to-scratch function address in scratch space */
la a4, _hartid_to_scratch
REG_S a4, SBI_SCRATCH_HARTID_TO_SCRATCH_OFFSET(tp)
/* Store trap-exit function address in scratch space */
la a4, _trap_exit
REG_S a4, SBI_SCRATCH_TRAP_EXIT_OFFSET(tp)
/* Clear tmp0 in scratch space */
REG_S zero, SBI_SCRATCH_TMP0_OFFSET(tp)
/* Store firmware options in scratch space */
MOV_3R s0, a0, s1, a1, s2, a2
#ifdef FW_OPTIONS
li a0, FW_OPTIONS
#else
call fw_options
#endif
REG_S a0, SBI_SCRATCH_OPTIONS_OFFSET(tp)
MOV_3R a0, s0, a1, s1, a2, s2
/* Move to next scratch space */
add t1, t1, t2
blt t1, s7, _scratch_init
可以看到上面的scratch_init汇编就是在对scratch这个结构体进行初始化配置:
/** Representation of per-HART scratch space */
struct sbi_scratch {
/** Start (or base) address of firmware linked to OpenSBI library */
unsigned long fw_start;
/** Size (in bytes) of firmware linked to OpenSBI library */
unsigned long fw_size;
/** Arg1 (or 'a1' register) of next booting stage for this HART */
unsigned long next_arg1;
/** Address of next booting stage for this HART */
unsigned long next_addr;
/** Priviledge mode of next booting stage for this HART */
unsigned long next_mode;
/** Warm boot entry point address for this HART */
unsigned long warmboot_addr;
/** Address of sbi_platform */
unsigned long platform_addr;
/** Address of HART ID to sbi_scratch conversion function */
unsigned long hartid_to_scratch;
/** Address of trap exit function */
unsigned long trap_exit;
/** Temporary storage */
unsigned long tmp0;
/** Options for OpenSBI library */
unsigned long options;
};
实现设备树的重定位
将t1中存储的设备树copy到下一级bootloader存放设备树地址FW_XXX_FDT_ADDR
/*
* Relocate Flatened Device Tree (FDT)
* source FDT address = previous arg1
* destination FDT address = next arg1
*
* Note: We will preserve a0 and a1 passed by
* previous booting stage.
*/
beqz a1, _fdt_reloc_done
/* Mask values in a3 and a4 */
li a3, ~(__SIZEOF_POINTER__ - 1)
li a4, 0xff
/* t1 = destination FDT start address */
MOV_3R s0, a0, s1, a1, s2, a2
call fw_next_arg1
add t1, a0, zero
MOV_3R a0, s0, a1, s1, a2, s2
beqz t1, _fdt_reloc_done
beq t1, a1, _fdt_reloc_done
and t1, t1, a3
/* t0 = source FDT start address */
add t0, a1, zero
and t0, t0, a3
/* t2 = source FDT size in big-endian */
#if __riscv_xlen == 64
lwu t2, 4(t0)
#else
lw t2, 4(t0)
#endif
/* t3 = bit[15:8] of FDT size */
add t3, t2, zero
srli t3, t3, 16
and t3, t3, a4
slli t3, t3, 8
/* t4 = bit[23:16] of FDT size */
add t4, t2, zero
srli t4, t4, 8
and t4, t4, a4
slli t4, t4, 16
/* t5 = bit[31:24] of FDT size */
add t5, t2, zero
and t5, t5, a4
slli t5, t5, 24
/* t2 = bit[7:0] of FDT size */
srli t2, t2, 24
and t2, t2, a4
/* t2 = FDT size in little-endian */
or t2, t2, t3
or t2, t2, t4
or t2, t2, t5
/* t2 = destination FDT end address */
add t2, t1, t2
/* FDT copy loop */
ble t2, t1, _fdt_reloc_done
_fdt_reloc_again:
REG_L t3, 0(t0)
REG_S t3, 0(t1)
add t0, t0, __SIZEOF_POINTER__
add t1, t1, __SIZEOF_POINTER__
blt t1, t2, _fdt_reloc_again
_fdt_reloc_done:
/* mark boot hart done */
li t0, BOOT_STATUS_BOOT_HART_DONE
la t1, _boot_status
REG_S t0, 0(t1)
fence rw, rw
j _start_warm
其他非启动hart将等待_boot_status=BOOT_STATUS_BOOT_HART_DONE
/* waiting for boot hart to be done (_boot_status == 2) */
_wait_for_boot_hart:
li t0, BOOT_STATUS_BOOT_HART_DONE
la t1, _boot_status
REG_L t1, 0(t1)
/* Reduce the bus traffic so that boot hart may proceed faster */
nop
nop
nop
bne t0, t1, _wait_for_boot_hart
所有的核都将进入_start_warm阶段
这个阶段的主要作用为:reset通用寄存器;关闭、清理中断;设置mscratch和sp寄存器;设置mtvec trap处理寄存器与_trap_handler函数。
_start_warm:
/* Reset all registers for non-boot HARTs */
li ra, 0
call _reset_regs
/* Disable and clear all interrupts */
csrw CSR_MIE, zero
csrw CSR_MIP, zero
/* Find HART count and HART stack size */
la a4, platform
#if __riscv_xlen == 64
lwu s7, SBI_PLATFORM_HART_COUNT_OFFSET(a4)
lwu s8, SBI_PLATFORM_HART_STACK_SIZE_OFFSET(a4)
#else
lw s7, SBI_PLATFORM_HART_COUNT_OFFSET(a4)
lw s8, SBI_PLATFORM_HART_STACK_SIZE_OFFSET(a4)
#endif
REG_L s9, SBI_PLATFORM_HART_INDEX2ID_OFFSET(a4)
/* Find HART id */
csrr s6, CSR_MHARTID
/* Find HART index */
beqz s9, 3f
li a4, 0
1:
#if __riscv_xlen == 64
lwu a5, (s9)
#else
lw a5, (s9)
#endif
beq a5, s6, 2f
add s9, s9, 4
add a4, a4, 1
blt a4, s7, 1b
li a4, -1
2: add s6, a4, zero
3: bge s6, s7, _start_hang
/* Find the scratch space based on HART index */
la tp, _fw_end
mul a5, s7, s8
add tp, tp, a5
mul a5, s8, s6
sub tp, tp, a5
li a5, SBI_SCRATCH_SIZE
sub tp, tp, a5
/* update the mscratch */
csrw CSR_MSCRATCH, tp
/* Setup stack */
add sp, tp, zero
/* Setup trap handler */
la a4, _trap_handler
#if __riscv_xlen == 32
csrr a5, CSR_MISA
srli a5, a5, ('H' - 'A')
andi a5, a5, 0x1
beq a5, zero, _skip_trap_handler_rv32_hyp
la a4, _trap_handler_rv32_hyp
_skip_trap_handler_rv32_hyp:
#endif
csrw CSR_MTVEC, a4
#if __riscv_xlen == 32
/* Override trap exit for H-extension */
csrr a5, CSR_MISA
srli a5, a5, ('H' - 'A')
andi a5, a5, 0x1
beq a5, zero, _skip_trap_exit_rv32_hyp
la a4, _trap_exit_rv32_hyp
csrr a5, CSR_MSCRATCH
REG_S a4, SBI_SCRATCH_TRAP_EXIT_OFFSET(a5)
_skip_trap_exit_rv32_hyp:
#endif
/* Initialize SBI runtime */
csrr a0, CSR_MSCRATCH
call sbi_init
/* We don't expect to reach here hence just hang */
j _start_hang
异常处理相关内容
这里需要特殊强调的是异常处理构建的相关内容:
这里会将a4寄存器中的值存储到CSR_MTVEC这个状态寄存器,也就是异常处理程序的的入口
/* Setup trap handler */
la a4, _trap_handler
#if __riscv_xlen == 32
csrr a5, CSR_MISA
srli a5, a5, ('H' - 'A')
andi a5, a5, 0x1
beq a5, zero, _skip_trap_handler_rv32_hyp
la a4, _trap_handler_rv32_hyp
#endif
csrw CSR_MTVEC, a4
.section .entry, "ax", %progbits
.align 3
.globl _trap_handler
_trap_handler:
TRAP_SAVE_AND_SETUP_SP_T0
TRAP_SAVE_MEPC_MSTATUS 0
TRAP_SAVE_GENERAL_REGS_EXCEPT_SP_T0
TRAP_CALL_C_ROUTINE
TRAP_RESTORE_GENERAL_REGS_EXCEPT_SP_T0
TRAP_RESTORE_MEPC_MSTATUS 0
TRAP_RESTORE_SP_T0
mret
建立excption_stack空间,如所示M模式下的异常,则从SP指针开始构建;若不是M模式进入异常,则需要从TP指针开始构建,TP的值为MSCRARCH(这个寄存器会在非M模式下记录M模式下栈帧地址)
.macro TRAP_SAVE_AND_SETUP_SP_T0
/* Swap TP and MSCRATCH */
csrrw tp, CSR_MSCRATCH, tp
/* Save T0 in scratch space */
REG_S t0, SBI_SCRATCH_TMP0_OFFSET(tp)
/*
* Set T0 to appropriate exception stack
*
* Came_From_M_Mode = ((MSTATUS.MPP < PRV_M) ? 1 : 0) - 1;
* Exception_Stack = TP ^ (Came_From_M_Mode & (SP ^ TP))
*
* Came_From_M_Mode = 0 ==> Exception_Stack = TP
* Came_From_M_Mode = -1 ==> Exception_Stack = SP
*/
csrr t0, CSR_MSTATUS
srl t0, t0, MSTATUS_MPP_SHIFT
and t0, t0, PRV_M
slti t0, t0, PRV_M
add t0, t0, -1
xor sp, sp, tp
and t0, t0, sp
xor sp, sp, tp
xor t0, tp, t0
/* Save original SP on exception stack */
REG_S sp, (SBI_TRAP_REGS_OFFSET(sp) - SBI_TRAP_REGS_SIZE)(t0)
/* Set SP to exception stack and make room for trap registers */
add sp, t0, -(SBI_TRAP_REGS_SIZE)
/* Restore T0 from scratch space */
REG_L t0, SBI_SCRATCH_TMP0_OFFSET(tp)
/* Save T0 on stack */
REG_S t0, SBI_TRAP_REGS_OFFSET(t0)(sp)
/* Swap TP and MSCRATCH */
csrrw tp, CSR_MSCRATCH, tp
.endm
TRAP_CALL_C_ROUTINE则会调用到C阶段:
.macro TRAP_CALL_C_ROUTINE
/* Call C routine */
add a0, sp, zero
call sbi_trap_handler
.endm
参考:
https://zhuanlan.zhihu.com/p/630062643
https://www.cnblogs.com/harrypotterjackson/p/17558399.html