/* SPDX-License-Identifier: GPL-2.0-only */ /* * relocate_kernel.S - put the kernel image in place to boot * Copyright (C) 2002-2005 Eric Biederman */ #include #include #include #include #include #include #include #include #include #include /* * Must be relocatable PIC code callable as a C function, in particular * there must be a plain RET and not jump to return thunk. */ #define PTR(x) (x << 3) #define PAGE_ATTR (_PAGE_PRESENT | _PAGE_RW | _PAGE_ACCESSED | _PAGE_DIRTY) /* * The .text..relocate_kernel and .data..relocate_kernel sections are copied * into the control page, and the remainder of the page is used as the stack. */ .section .data..relocate_kernel,"a"; /* Minimal CPU state */ SYM_DATA_LOCAL(saved_rsp, .quad 0) SYM_DATA_LOCAL(saved_cr0, .quad 0) SYM_DATA_LOCAL(saved_cr3, .quad 0) SYM_DATA_LOCAL(saved_cr4, .quad 0) /* other data */ SYM_DATA(kexec_va_control_page, .quad 0) SYM_DATA(kexec_pa_table_page, .quad 0) SYM_DATA(kexec_pa_swap_page, .quad 0) SYM_DATA_LOCAL(pa_backup_pages_map, .quad 0) SYM_DATA(kexec_debug_8250_mmio32, .quad 0) SYM_DATA(kexec_debug_8250_port, .word 0) .balign 16 SYM_DATA_START_LOCAL(kexec_debug_gdt) .word kexec_debug_gdt_end - kexec_debug_gdt - 1 .long 0 .word 0 .quad 0x00cf9a000000ffff /* __KERNEL32_CS */ .quad 0x00af9a000000ffff /* __KERNEL_CS */ .quad 0x00cf92000000ffff /* __KERNEL_DS */ SYM_DATA_END_LABEL(kexec_debug_gdt, SYM_L_LOCAL, kexec_debug_gdt_end) .balign 8 SYM_DATA_START(kexec_debug_idt) .skip 0x100, 0x00 SYM_DATA_END(kexec_debug_idt) .section .text..relocate_kernel,"ax"; .code64 SYM_CODE_START_NOALIGN(relocate_kernel) UNWIND_HINT_END_OF_STACK ANNOTATE_NOENDBR /* * %rdi indirection_page * %rsi pa_control_page * %rdx start address * %rcx preserve_context * %r8 host_mem_enc_active */ /* Save the CPU context, used for jumping back */ pushq %rbx pushq %rbp pushq %r12 pushq %r13 pushq %r14 pushq %r15 pushf /* Invalidate GDT/IDT, zero out flags */ pushq $0 pushq $0 lidt (%rsp) lgdt (%rsp) addq $8, %rsp popfq /* Switch to the identity mapped page tables */ movq %cr3, %rax movq kexec_pa_table_page(%rip), %r9 movq %r9, %cr3 /* Leave CR4 in %r13 to enable the right paging mode later. */ movq %cr4, %r13 /* Disable global pages immediately to ensure this mapping is RWX */ movq %r13, %r12 andq $~(X86_CR4_PGE), %r12 movq %r12, %cr4 /* Save %rsp and CRs. */ movq %r13, saved_cr4(%rip) movq %rsp, saved_rsp(%rip) movq %rax, saved_cr3(%rip) movq %cr0, %rax movq %rax, saved_cr0(%rip) /* save indirection list for jumping back */ movq %rdi, pa_backup_pages_map(%rip) /* Save the preserve_context to %r11 as swap_pages clobbers %rcx. */ movq %rcx, %r11 /* setup a new stack at the end of the physical control page */ lea PAGE_SIZE(%rsi), %rsp /* jump to identity mapped page */ 0: addq $identity_mapped - 0b, %rsi subq $__relocate_kernel_start - 0b, %rsi ANNOTATE_RETPOLINE_SAFE jmp *%rsi SYM_CODE_END(relocate_kernel) SYM_CODE_START_LOCAL_NOALIGN(identity_mapped) UNWIND_HINT_END_OF_STACK /* * %rdi indirection page * %rdx start address * %r8 host_mem_enc_active * %r9 page table page * %r11 preserve_context * %r13 original CR4 when relocate_kernel() was invoked */ /* store the start address on the stack */ pushq %rdx /* Create a GDTR (16 bits limit, 64 bits addr) on stack */ leaq kexec_debug_gdt(%rip), %rax pushq %rax pushw (%rax) /* Load the GDT, put the stack back */ lgdt (%rsp) addq $10, %rsp /* Test that we can load segments */ movq %ds, %rax movq %rax, %ds /* Now an IDTR on the stack to load the IDT the kernel created */ leaq kexec_debug_idt(%rip), %rsi pushq %rsi pushw $0xff lidt (%rsp) addq $10, %rsp //int3 /* * Clear X86_CR4_CET (if it was set) such that we can clear CR0_WP * below. */ movq %cr4, %rax andq $~(X86_CR4_CET), %rax movq %rax, %cr4 /* * Set cr0 to a known state: * - Paging enabled * - Alignment check disabled * - Write protect disabled * - No task switch * - Don't do FP software emulation. * - Protected mode enabled */ movq %cr0, %rax andq $~(X86_CR0_AM | X86_CR0_WP | X86_CR0_TS | X86_CR0_EM), %rax orl $(X86_CR0_PG | X86_CR0_PE), %eax movq %rax, %cr0 /* * Set cr4 to a known state: * - physical address extension enabled * - 5-level paging, if it was enabled before * - Machine check exception on TDX guest, if it was enabled before. * Clearing MCE might not be allowed in TDX guests, depending on setup. * * Use R13 that contains the original CR4 value, read in relocate_kernel(). * PAE is always set in the original CR4. */ andl $(X86_CR4_PAE | X86_CR4_LA57), %r13d ALTERNATIVE "", __stringify(orl $X86_CR4_MCE, %r13d), X86_FEATURE_TDX_GUEST movq %r13, %cr4 /* Flush the TLB (needed?) */ movq %r9, %cr3 /* * If SME is active, there could be old encrypted cache line * entries that will conflict with the now unencrypted memory * used by kexec. Flush the caches before copying the kernel. */ testq %r8, %r8 jz .Lsme_off wbinvd .Lsme_off: call swap_pages /* * To be certain of avoiding problems with self-modifying code * I need to execute a serializing instruction here. * So I flush the TLB by reloading %cr3 here, it's handy, * and not processor dependent. */ movq %cr3, %rax movq %rax, %cr3 testq %r11, %r11 /* preserve_context */ jnz .Lrelocate /* * set all of the registers to known values * leave %rsp alone */ xorl %eax, %eax xorl %ebx, %ebx xorl %ecx, %ecx xorl %edx, %edx xorl %esi, %esi xorl %edi, %edi xorl %ebp, %ebp xorl %r8d, %r8d xorl %r9d, %r9d xorl %r10d, %r10d xorl %r11d, %r11d xorl %r12d, %r12d xorl %r13d, %r13d xorl %r14d, %r14d xorl %r15d, %r15d ANNOTATE_UNRET_SAFE ret int3 .Lrelocate: popq %rdx /* Use the swap page for the callee's stack */ movq kexec_pa_swap_page(%rip), %r10 leaq PAGE_SIZE(%r10), %rsp /* push the existing entry point onto the callee's stack */ pushq %rdx ANNOTATE_RETPOLINE_SAFE call *%rdx /* get the re-entry point of the peer system */ popq %rbp movq kexec_pa_swap_page(%rip), %r10 movq pa_backup_pages_map(%rip), %rdi movq kexec_pa_table_page(%rip), %rax movq %rax, %cr3 /* Find start (and end) of this physical mapping of control page */ leaq (%rip), %r8 ANNOTATE_NOENDBR andq $PAGE_MASK, %r8 lea PAGE_SIZE(%r8), %rsp movl $1, %r11d /* Ensure preserve_context flag is set */ call swap_pages movq kexec_va_control_page(%rip), %rax 0: addq $virtual_mapped - 0b, %rax subq $__relocate_kernel_start - 0b, %rax pushq %rax ANNOTATE_UNRET_SAFE ret int3 SYM_CODE_END(identity_mapped) SYM_CODE_START_LOCAL_NOALIGN(virtual_mapped) UNWIND_HINT_END_OF_STACK ANNOTATE_NOENDBR // RET target, above movq saved_rsp(%rip), %rsp movq saved_cr4(%rip), %rax movq %rax, %cr4 movq saved_cr3(%rip), %rax movq saved_cr0(%rip), %r8 movq %rax, %cr3 movq %r8, %cr0 #ifdef CONFIG_KEXEC_JUMP /* Saved in save_processor_state. */ movq $saved_context, %rax lgdt saved_context_gdt_desc(%rax) #endif /* relocate_kernel() returns the re-entry point for next time */ movq %rbp, %rax popf popq %r15 popq %r14 popq %r13 popq %r12 popq %rbp popq %rbx ANNOTATE_UNRET_SAFE ret int3 SYM_CODE_END(virtual_mapped) /* Do the copies */ SYM_CODE_START_LOCAL_NOALIGN(swap_pages) UNWIND_HINT_END_OF_STACK /* * %rdi indirection page * %r11 preserve_context */ movq %rdi, %rcx /* Put the indirection_page in %rcx */ xorl %edi, %edi xorl %esi, %esi jmp .Lstart /* Should start with an indirection record */ .Lloop: /* top, read another word for the indirection page */ movq (%rbx), %rcx addq $8, %rbx .Lstart: testb $0x1, %cl /* is it a destination page? */ jz .Lnotdest movq %rcx, %rdi andq $0xfffffffffffff000, %rdi jmp .Lloop .Lnotdest: testb $0x2, %cl /* is it an indirection page? */ jz .Lnotind movq %rcx, %rbx andq $0xfffffffffffff000, %rbx jmp .Lloop .Lnotind: testb $0x4, %cl /* is it the done indicator? */ jz .Lnotdone jmp .Ldone .Lnotdone: testb $0x8, %cl /* is it the source indicator? */ jz .Lloop /* Ignore it otherwise */ movq %rcx, %rsi /* For ever source page do a copy */ andq $0xfffffffffffff000, %rsi movq %rdi, %rdx /* Save destination page to %rdx */ movq %rsi, %rax /* Save source page to %rax */ testq %r11, %r11 /* Only actually swap for ::preserve_context */ jz .Lnoswap /* copy source page to swap page */ movq kexec_pa_swap_page(%rip), %rdi movl $512, %ecx rep movsq /* copy destination page to source page */ movq %rax, %rdi movq %rdx, %rsi movl $512, %ecx rep movsq /* copy swap page to destination page */ movq %rdx, %rdi movq kexec_pa_swap_page(%rip), %rsi .Lnoswap: movl $512, %ecx rep movsq lea PAGE_SIZE(%rax), %rsi jmp .Lloop .Ldone: ANNOTATE_UNRET_SAFE ret int3 SYM_CODE_END(swap_pages) /* * Generic 'print character' routine * - %al: Character to be printed (may clobber %rax) * - %rdx: MMIO address or port. */ #define XMTRDY 0x20 #define TXR 0 /* Transmit register (WRITE) */ #define LSR 5 /* Line Status */ SYM_CODE_START_LOCAL_NOALIGN(pr_char_8250) UNWIND_HINT_FUNC ANNOTATE_NOENDBR addw $LSR, %dx xchg %al, %ah .Lxmtrdy_loop: inb %dx, %al testb $XMTRDY, %al jnz .Lready pause jmp .Lxmtrdy_loop .Lready: subw $LSR, %dx xchg %al, %ah outb %al, %dx pr_char_null: ANNOTATE_NOENDBR ANNOTATE_UNRET_SAFE ret SYM_CODE_END(pr_char_8250) SYM_CODE_START_LOCAL_NOALIGN(pr_char_8250_mmio32) UNWIND_HINT_FUNC ANNOTATE_NOENDBR .Lxmtrdy_loop_mmio: movb (LSR*4)(%rdx), %ah testb $XMTRDY, %ah jnz .Lready_mmio pause jmp .Lxmtrdy_loop_mmio .Lready_mmio: movb %al, (%rdx) ANNOTATE_UNRET_SAFE ret SYM_CODE_END(pr_char_8250_mmio32) /* * Load pr_char function pointer into %rsi and load %rdx with whatever * that function wants to see there (typically port/MMIO address). */ .macro pr_setup leaq pr_char_8250(%rip), %rsi movw kexec_debug_8250_port(%rip), %dx testw %dx, %dx jnz 1f leaq pr_char_8250_mmio32(%rip), %rsi movq kexec_debug_8250_mmio32(%rip), %rdx testq %rdx, %rdx jnz 1f leaq pr_char_null(%rip), %rsi 1: .endm /* Print the nybble in %bl, clobber %rax */ SYM_CODE_START_LOCAL_NOALIGN(pr_nybble) UNWIND_HINT_FUNC movb %bl, %al nop andb $0x0f, %al addb $0x30, %al cmpb $0x3a, %al jb 1f addb $('a' - '0' - 10), %al ANNOTATE_RETPOLINE_SAFE 1: jmp *%rsi SYM_CODE_END(pr_nybble) SYM_CODE_START_LOCAL_NOALIGN(pr_qword) UNWIND_HINT_FUNC movq $16, %rcx 1: rolq $4, %rbx call pr_nybble loop 1b movb $'\n', %al ANNOTATE_RETPOLINE_SAFE jmp *%rsi SYM_CODE_END(pr_qword) .macro print_reg a, b, c, d, r movb $\a, %al ANNOTATE_RETPOLINE_SAFE call *%rsi movb $\b, %al ANNOTATE_RETPOLINE_SAFE call *%rsi movb $\c, %al ANNOTATE_RETPOLINE_SAFE call *%rsi movb $\d, %al ANNOTATE_RETPOLINE_SAFE call *%rsi movq \r, %rbx call pr_qword .endm SYM_CODE_START_NOALIGN(kexec_debug_exc_vectors) /* Each of these is 6 bytes. */ .macro vec_err exc UNWIND_HINT_ENTRY . = kexec_debug_exc_vectors + (\exc * KEXEC_DEBUG_EXC_HANDLER_SIZE) nop nop pushq $\exc jmp exc_handler .endm .macro vec_noerr exc UNWIND_HINT_ENTRY . = kexec_debug_exc_vectors + (\exc * KEXEC_DEBUG_EXC_HANDLER_SIZE) pushq $0 pushq $\exc jmp exc_handler .endm ANNOTATE_NOENDBR vec_noerr 0 // #DE vec_noerr 1 // #DB vec_noerr 2 // #NMI vec_noerr 3 // #BP vec_noerr 4 // #OF vec_noerr 5 // #BR vec_noerr 6 // #UD vec_noerr 7 // #NM vec_err 8 // #DF vec_noerr 9 vec_err 10 // #TS vec_err 11 // #NP vec_err 12 // #SS vec_err 13 // #GP vec_err 14 // #PF vec_noerr 15 SYM_CODE_END(kexec_debug_exc_vectors) SYM_CODE_START_LOCAL_NOALIGN(exc_handler) /* No need for RET mitigations during kexec */ VALIDATE_UNRET_END pushq %rax pushq %rbx pushq %rcx pushq %rdx pushq %rsi /* Stack frame */ #define EXC_SS 0x58 /* Architectural... */ #define EXC_RSP 0x50 #define EXC_EFLAGS 0x48 #define EXC_CS 0x40 #define EXC_RIP 0x38 #define EXC_ERRORCODE 0x30 /* Either architectural or zero pushed by handler */ #define EXC_EXCEPTION 0x28 /* Pushed by handler entry point */ #define EXC_RAX 0x20 /* Pushed just above in exc_handler */ #define EXC_RBX 0x18 #define EXC_RCX 0x10 #define EXC_RDX 0x08 #define EXC_RSI 0x00 /* Set up %rdx/%rsi for debug output */ pr_setup /* rip and exception info */ print_reg 'E', 'x', 'c', ':', EXC_EXCEPTION(%rsp) print_reg 'E', 'r', 'r', ':', EXC_ERRORCODE(%rsp) print_reg 'r', 'i', 'p', ':', EXC_RIP(%rsp) print_reg 'r', 's', 'p', ':', EXC_RSP(%rsp) /* We spilled these to the stack */ print_reg 'r', 'a', 'x', ':', EXC_RAX(%rsp) print_reg 'r', 'b', 'x', ':', EXC_RBX(%rsp) print_reg 'r', 'c', 'x', ':', EXC_RCX(%rsp) print_reg 'r', 'd', 'x', ':', EXC_RDX(%rsp) print_reg 'r', 's', 'i', ':', EXC_RSI(%rsp) /* Other registers untouched */ print_reg 'r', 'd', 'i', ':', %rdi print_reg 'r', '8', ' ', ':', %r8 print_reg 'r', '9', ' ', ':', %r9 print_reg 'r', '1', '0', ':', %r10 print_reg 'r', '1', '1', ':', %r11 print_reg 'r', '1', '2', ':', %r12 print_reg 'r', '1', '3', ':', %r13 print_reg 'r', '1', '4', ':', %r14 print_reg 'r', '1', '5', ':', %r15 print_reg 'c', 'r', '2', ':', %cr2 /* Only return from INT3 */ cmpq $3, EXC_EXCEPTION(%rsp) jne .Ldie popq %rsi popq %rdx popq %rcx popq %rbx popq %rax addq $16, %rsp iretq .Ldie: hlt jmp .Ldie SYM_CODE_END(exc_handler)