From da64a2359092ceec4f9dea5b329d0aef20104217 Mon Sep 17 00:00:00 2001 From: Tiezhu Yang Date: Sat, 8 Mar 2025 13:50:45 +0800 Subject: [PATCH 1/8] LoongArch: Convert unreachable() to BUG() When compiling on LoongArch, there exists the following objtool warning in arch/loongarch/kernel/machine_kexec.o: kexec_reboot() falls through to next function crash_shutdown_secondary() Avoid using unreachable() as it can (and will in the absence of UBSAN) generate fall-through code. Use BUG() so we get a "break BRK_BUG" trap (with unreachable annotation). Cc: stable@vger.kernel.org # 6.12+ Acked-by: Josh Poimboeuf Signed-off-by: Tiezhu Yang Signed-off-by: Huacai Chen --- arch/loongarch/kernel/machine_kexec.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/loongarch/kernel/machine_kexec.c b/arch/loongarch/kernel/machine_kexec.c index 8ae641dc53bb7..f9381800e291c 100644 --- a/arch/loongarch/kernel/machine_kexec.c +++ b/arch/loongarch/kernel/machine_kexec.c @@ -126,14 +126,14 @@ void kexec_reboot(void) /* All secondary cpus go to kexec_smp_wait */ if (smp_processor_id() > 0) { relocated_kexec_smp_wait(NULL); - unreachable(); + BUG(); } #endif do_kexec = (void *)reboot_code_buffer; do_kexec(efi_boot, cmdline_ptr, systable_ptr, start_addr, first_ind_entry); - unreachable(); + BUG(); } From a0d3c8bcb9206ac207c7ad3182027c6b0a1319bb Mon Sep 17 00:00:00 2001 From: Yuli Wang Date: Sat, 8 Mar 2025 13:51:32 +0800 Subject: [PATCH 2/8] LoongArch: Eliminate superfluous get_numa_distances_cnt() In LoongArch, get_numa_distances_cnt() isn't in use, resulting in a compiler warning. Fix follow errors with clang-18 when W=1e: arch/loongarch/kernel/acpi.c:259:28: error: unused function 'get_numa_distances_cnt' [-Werror,-Wunused-function] 259 | static inline unsigned int get_numa_distances_cnt(struct acpi_table_slit *slit) | ^~~~~~~~~~~~~~~~~~~~~~ 1 error generated. Link: https://lore.kernel.org/all/Z7bHPVUH4lAezk0E@kernel.org/ Signed-off-by: Yuli Wang Signed-off-by: Huacai Chen --- arch/loongarch/kernel/acpi.c | 12 ------------ 1 file changed, 12 deletions(-) diff --git a/arch/loongarch/kernel/acpi.c b/arch/loongarch/kernel/acpi.c index 382a09a7152c3..1120ac2824f6e 100644 --- a/arch/loongarch/kernel/acpi.c +++ b/arch/loongarch/kernel/acpi.c @@ -249,18 +249,6 @@ static __init int setup_node(int pxm) return acpi_map_pxm_to_node(pxm); } -/* - * Callback for SLIT parsing. pxm_to_node() returns NUMA_NO_NODE for - * I/O localities since SRAT does not list them. I/O localities are - * not supported at this point. - */ -unsigned int numa_distance_cnt; - -static inline unsigned int get_numa_distances_cnt(struct acpi_table_slit *slit) -{ - return slit->locality_count; -} - void __init numa_set_distance(int from, int to, int distance) { if ((u8)distance != distance || (from == to && distance != LOCAL_DISTANCE)) { From c9117434c8f7523f0b77db4c5766f5011cc94677 Mon Sep 17 00:00:00 2001 From: Huacai Chen Date: Sat, 8 Mar 2025 13:51:32 +0800 Subject: [PATCH 3/8] LoongArch: Use polling play_dead() when resuming from hibernation When CONFIG_RANDOM_KMALLOC_CACHES or other randomization infrastructrue enabled, the idle_task's stack may different between the booting kernel and target kernel. So when resuming from hibernation, an ACTION_BOOT_CPU IPI wakeup the idle instruction in arch_cpu_idle_dead() and jump to the interrupt handler. But since the stack pointer is changed, the interrupt handler cannot restore correct context. So rename the current arch_cpu_idle_dead() to idle_play_dead(), make it as the default version of play_dead(), and the new arch_cpu_idle_dead() call play_dead() directly. For hibernation, implement an arch-specific hibernate_resume_nonboot_cpu_disable() to use the polling version (idle instruction is replace by nop, and irq is disabled) of play_dead(), i.e. poll_play_dead(), to avoid IPI handler corrupting the idle_task's stack when resuming from hibernation. This solution is a little similar to commit 406f992e4a372dafbe3c ("x86 / hibernate: Use hlt_play_dead() when resuming from hibernation"). Cc: stable@vger.kernel.org Tested-by: Erpeng Xu Tested-by: Yuli Wang Signed-off-by: Huacai Chen --- arch/loongarch/kernel/smp.c | 47 ++++++++++++++++++++++++++++++++++++- 1 file changed, 46 insertions(+), 1 deletion(-) diff --git a/arch/loongarch/kernel/smp.c b/arch/loongarch/kernel/smp.c index fbf747447f13f..4b24589c0b565 100644 --- a/arch/loongarch/kernel/smp.c +++ b/arch/loongarch/kernel/smp.c @@ -19,6 +19,7 @@ #include #include #include +#include #include #include #include @@ -423,7 +424,7 @@ void loongson_cpu_die(unsigned int cpu) mb(); } -void __noreturn arch_cpu_idle_dead(void) +static void __noreturn idle_play_dead(void) { register uint64_t addr; register void (*init_fn)(void); @@ -447,6 +448,50 @@ void __noreturn arch_cpu_idle_dead(void) BUG(); } +#ifdef CONFIG_HIBERNATION +static void __noreturn poll_play_dead(void) +{ + register uint64_t addr; + register void (*init_fn)(void); + + idle_task_exit(); + __this_cpu_write(cpu_state, CPU_DEAD); + + __smp_mb(); + do { + __asm__ __volatile__("nop\n\t"); + addr = iocsr_read64(LOONGARCH_IOCSR_MBUF0); + } while (addr == 0); + + init_fn = (void *)TO_CACHE(addr); + iocsr_write32(0xffffffff, LOONGARCH_IOCSR_IPI_CLEAR); + + init_fn(); + BUG(); +} +#endif + +static void (*play_dead)(void) = idle_play_dead; + +void __noreturn arch_cpu_idle_dead(void) +{ + play_dead(); + BUG(); /* play_dead() doesn't return */ +} + +#ifdef CONFIG_HIBERNATION +int hibernate_resume_nonboot_cpu_disable(void) +{ + int ret; + + play_dead = poll_play_dead; + ret = suspend_disable_secondary_cpus(); + play_dead = idle_play_dead; + + return ret; +} +#endif + #endif /* From c8477bb0a8e7f6b2e47952b403c5cb67a6929e55 Mon Sep 17 00:00:00 2001 From: Bibo Mao Date: Sat, 8 Mar 2025 13:51:32 +0800 Subject: [PATCH 4/8] LoongArch: Set max_pfn with the PFN of the last page The current max_pfn equals to zero. In this case, it causes user cannot get some page information through /proc filesystem such as kpagecount. The following message is displayed by stress-ng test suite with command "stress-ng --verbose --physpage 1 -t 1". # stress-ng --verbose --physpage 1 -t 1 stress-ng: error: [1691] physpage: cannot read page count for address 0x134ac000 in /proc/kpagecount, errno=22 (Invalid argument) stress-ng: error: [1691] physpage: cannot read page count for address 0x7ffff207c3a8 in /proc/kpagecount, errno=22 (Invalid argument) stress-ng: error: [1691] physpage: cannot read page count for address 0x134b0000 in /proc/kpagecount, errno=22 (Invalid argument) ... After applying this patch, the kernel can pass the test. # stress-ng --verbose --physpage 1 -t 1 stress-ng: debug: [1701] physpage: [1701] started (instance 0 on CPU 3) stress-ng: debug: [1701] physpage: [1701] exited (instance 0 on CPU 3) stress-ng: debug: [1700] physpage: [1701] terminated (success) Cc: stable@vger.kernel.org # 6.8+ Fixes: ff6c3d81f2e8 ("NUMA: optimize detection of memory with no node id assigned by firmware") Signed-off-by: Bibo Mao Signed-off-by: Huacai Chen --- arch/loongarch/kernel/setup.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/arch/loongarch/kernel/setup.c b/arch/loongarch/kernel/setup.c index edcfdfcad7d22..90cb3ca96f085 100644 --- a/arch/loongarch/kernel/setup.c +++ b/arch/loongarch/kernel/setup.c @@ -387,6 +387,9 @@ static void __init check_kernel_sections_mem(void) */ static void __init arch_mem_init(char **cmdline_p) { + /* Recalculate max_low_pfn for "mem=xxx" */ + max_pfn = max_low_pfn = PHYS_PFN(memblock_end_of_DRAM()); + if (usermem) pr_info("User-defined physical RAM map overwrite\n"); From 3109d5ff484b7bc7b955f166974c6776d91f247b Mon Sep 17 00:00:00 2001 From: Bibo Mao Date: Sat, 8 Mar 2025 13:51:32 +0800 Subject: [PATCH 5/8] LoongArch: Set hugetlb mmap base address aligned with pmd size With ltp test case "testcases/bin/hugefork02", there is a dmesg error report message such as: kernel BUG at mm/hugetlb.c:5550! Oops - BUG[#1]: CPU: 0 UID: 0 PID: 1517 Comm: hugefork02 Not tainted 6.14.0-rc2+ #241 Hardware name: QEMU QEMU Virtual Machine, BIOS unknown 2/2/2022 pc 90000000004eaf1c ra 9000000000485538 tp 900000010edbc000 sp 900000010edbf940 a0 900000010edbfb00 a1 9000000108d20280 a2 00007fffe9474000 a3 00007ffff3474000 a4 0000000000000000 a5 0000000000000003 a6 00000000003cadd3 a7 0000000000000000 t0 0000000001ffffff t1 0000000001474000 t2 900000010ecd7900 t3 00007fffe9474000 t4 00007fffe9474000 t5 0000000000000040 t6 900000010edbfb00 t7 0000000000000001 t8 0000000000000005 u0 90000000004849d0 s9 900000010edbfa00 s0 9000000108d20280 s1 00007fffe9474000 s2 0000000002000000 s3 9000000108d20280 s4 9000000002b38b10 s5 900000010edbfb00 s6 00007ffff3474000 s7 0000000000000406 s8 900000010edbfa08 ra: 9000000000485538 unmap_vmas+0x130/0x218 ERA: 90000000004eaf1c __unmap_hugepage_range+0x6f4/0x7d0 PRMD: 00000004 (PPLV0 +PIE -PWE) EUEN: 00000007 (+FPE +SXE +ASXE -BTE) ECFG: 00071c1d (LIE=0,2-4,10-12 VS=7) ESTAT: 000c0000 [BRK] (IS= ECode=12 EsubCode=0) PRID: 0014c010 (Loongson-64bit, Loongson-3A5000) Process hugefork02 (pid: 1517, threadinfo=00000000a670eaf4, task=000000007a95fc64) Call Trace: [<90000000004eaf1c>] __unmap_hugepage_range+0x6f4/0x7d0 [<9000000000485534>] unmap_vmas+0x12c/0x218 [<9000000000494068>] exit_mmap+0xe0/0x308 [<900000000025fdc4>] mmput+0x74/0x180 [<900000000026a284>] do_exit+0x294/0x898 [<900000000026aa30>] do_group_exit+0x30/0x98 [<900000000027bed4>] get_signal+0x83c/0x868 [<90000000002457b4>] arch_do_signal_or_restart+0x54/0xfa0 [<90000000015795e8>] irqentry_exit_to_user_mode+0xb8/0x138 [<90000000002572d0>] tlb_do_page_fault_1+0x114/0x1b4 The problem is that base address allocated from hugetlbfs is not aligned with pmd size. Here add a checking for hugetlbfs and align base address with pmd size. After this patch the test case "testcases/bin/hugefork02" passes to run. This is similar to the commit 7f24cbc9c4d42db8a3c8484d1 ("mm/mmap: teach generic_get_unmapped_area{_topdown} to handle hugetlb mappings"). Cc: stable@vger.kernel.org # 6.13+ Signed-off-by: Bibo Mao Signed-off-by: Huacai Chen --- arch/loongarch/mm/mmap.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/arch/loongarch/mm/mmap.c b/arch/loongarch/mm/mmap.c index 914e82ff3f656..1df9e99582cc6 100644 --- a/arch/loongarch/mm/mmap.c +++ b/arch/loongarch/mm/mmap.c @@ -3,6 +3,7 @@ * Copyright (C) 2020-2022 Loongson Technology Corporation Limited */ #include +#include #include #include #include @@ -63,8 +64,11 @@ static unsigned long arch_get_unmapped_area_common(struct file *filp, } info.length = len; - info.align_mask = do_color_align ? (PAGE_MASK & SHM_ALIGN_MASK) : 0; info.align_offset = pgoff << PAGE_SHIFT; + if (filp && is_file_hugepages(filp)) + info.align_mask = huge_page_mask_align(filp); + else + info.align_mask = do_color_align ? (PAGE_MASK & SHM_ALIGN_MASK) : 0; if (dir == DOWN) { info.flags = VM_UNMAPPED_AREA_TOPDOWN; From 6fb1867d5a44b0a061cf39d2492d23d314bcb8ce Mon Sep 17 00:00:00 2001 From: Bibo Mao Date: Sat, 8 Mar 2025 13:51:59 +0800 Subject: [PATCH 6/8] LoongArch: KVM: Add interrupt checking for AVEC There is a newly added macro INT_AVEC with CSR ESTAT register, which is bit 14 used for LoongArch AVEC support. AVEC interrupt status bit 14 is supported with macro CSR_ESTAT_IS, so here replace the hard-coded value 0x1fff with macro CSR_ESTAT_IS so that the AVEC interrupt status is also supported by KVM. Cc: stable@vger.kernel.org Signed-off-by: Bibo Mao Signed-off-by: Huacai Chen --- arch/loongarch/kvm/vcpu.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/loongarch/kvm/vcpu.c b/arch/loongarch/kvm/vcpu.c index 20f941af3e9ea..9e1a9b4aa4c6a 100644 --- a/arch/loongarch/kvm/vcpu.c +++ b/arch/loongarch/kvm/vcpu.c @@ -311,7 +311,7 @@ static int kvm_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu) { int ret = RESUME_GUEST; unsigned long estat = vcpu->arch.host_estat; - u32 intr = estat & 0x1fff; /* Ignore NMI */ + u32 intr = estat & CSR_ESTAT_IS; u32 ecode = (estat & CSR_ESTAT_EXC) >> CSR_ESTAT_EXC_SHIFT; vcpu->mode = OUTSIDE_GUEST_MODE; From 78d7bc5a02e1468df53896df354fa80727f35b7d Mon Sep 17 00:00:00 2001 From: Bibo Mao Date: Sat, 8 Mar 2025 13:52:01 +0800 Subject: [PATCH 7/8] LoongArch: KVM: Reload guest CSR registers after sleep On host, the HW guest CSR registers are lost after suspend and resume operation. Since last_vcpu of boot CPU still records latest vCPU pointer so that the guest CSR register skips to reload when boot CPU resumes and vCPU is scheduled. Here last_vcpu is cleared so that guest CSR registers will reload from scheduled vCPU context after suspend and resume. Cc: stable@vger.kernel.org Signed-off-by: Bibo Mao Signed-off-by: Huacai Chen --- arch/loongarch/kvm/main.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/arch/loongarch/kvm/main.c b/arch/loongarch/kvm/main.c index f6d3242b9234a..b6864d6e5ec8d 100644 --- a/arch/loongarch/kvm/main.c +++ b/arch/loongarch/kvm/main.c @@ -317,6 +317,13 @@ int kvm_arch_enable_virtualization_cpu(void) kvm_debug("GCFG:%lx GSTAT:%lx GINTC:%lx GTLBC:%lx", read_csr_gcfg(), read_csr_gstat(), read_csr_gintc(), read_csr_gtlbc()); + /* + * HW Guest CSR registers are lost after CPU suspend and resume. + * Clear last_vcpu so that Guest CSR registers forced to reload + * from vCPU SW state. + */ + this_cpu_ptr(vmcs)->last_vcpu = NULL; + return 0; } From 6bdbb73dc8d99fbb77f5db79dbb6f108708090b4 Mon Sep 17 00:00:00 2001 From: Bibo Mao Date: Sat, 8 Mar 2025 13:52:04 +0800 Subject: [PATCH 8/8] LoongArch: KVM: Fix GPA size issue about VM Physical address space is 48 bit on Loongson-3A5000 physical machine, however it is 47 bit for VM on Loongson-3A5000 system. Size of physical address space of VM is the same with the size of virtual user space (a half) of physical machine. Variable cpu_vabits represents user address space, kernel address space is not included (user space and kernel space are both a half of total). Here cpu_vabits, rather than cpu_vabits - 1, is to represent the size of guest physical address space. Also there is strict checking about page fault GPA address, inject error if it is larger than maximum GPA address of VM. Cc: stable@vger.kernel.org Signed-off-by: Bibo Mao Signed-off-by: Huacai Chen --- arch/loongarch/kvm/exit.c | 6 ++++++ arch/loongarch/kvm/vm.c | 6 +++++- 2 files changed, 11 insertions(+), 1 deletion(-) diff --git a/arch/loongarch/kvm/exit.c b/arch/loongarch/kvm/exit.c index c1e8ec5b941b2..ea321403644ad 100644 --- a/arch/loongarch/kvm/exit.c +++ b/arch/loongarch/kvm/exit.c @@ -669,6 +669,12 @@ static int kvm_handle_rdwr_fault(struct kvm_vcpu *vcpu, bool write) struct kvm_run *run = vcpu->run; unsigned long badv = vcpu->arch.badv; + /* Inject ADE exception if exceed max GPA size */ + if (unlikely(badv >= vcpu->kvm->arch.gpa_size)) { + kvm_queue_exception(vcpu, EXCCODE_ADE, EXSUBCODE_ADEM); + return RESUME_GUEST; + } + ret = kvm_handle_mm_fault(vcpu, badv, write); if (ret) { /* Treat as MMIO */ diff --git a/arch/loongarch/kvm/vm.c b/arch/loongarch/kvm/vm.c index b8b3e1972d6ea..edccfc8c9cd80 100644 --- a/arch/loongarch/kvm/vm.c +++ b/arch/loongarch/kvm/vm.c @@ -48,7 +48,11 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type) if (kvm_pvtime_supported()) kvm->arch.pv_features |= BIT(KVM_FEATURE_STEAL_TIME); - kvm->arch.gpa_size = BIT(cpu_vabits - 1); + /* + * cpu_vabits means user address space only (a half of total). + * GPA size of VM is the same with the size of user address space. + */ + kvm->arch.gpa_size = BIT(cpu_vabits); kvm->arch.root_level = CONFIG_PGTABLE_LEVELS - 1; kvm->arch.invalid_ptes[0] = 0; kvm->arch.invalid_ptes[1] = (unsigned long)invalid_pte_table;