diff --git a/Documentation/admin-guide/pm/cpufreq.rst b/Documentation/admin-guide/pm/cpufreq.rst index a21369eba034d..3950583f2b154 100644 --- a/Documentation/admin-guide/pm/cpufreq.rst +++ b/Documentation/admin-guide/pm/cpufreq.rst @@ -248,6 +248,20 @@ are the following: If that frequency cannot be determined, this attribute should not be present. +``cpuinfo_avg_freq`` + An average frequency (in KHz) of all CPUs belonging to a given policy, + derived from a hardware provided feedback and reported on a time frame + spanning at most few milliseconds. + + This is expected to be based on the frequency the hardware actually runs + at and, as such, might require specialised hardware support (such as AMU + extension on ARM). If one cannot be determined, this attribute should + not be present. + + Note, that failed attempt to retrieve current frequency for a given + CPU(s) will result in an appropriate error, i.e: EAGAIN for CPU that + remains idle (raised on ARM). + ``cpuinfo_max_freq`` Maximum possible operating frequency the CPUs belonging to this policy can run at (in kHz). @@ -293,7 +307,8 @@ are the following: Some architectures (e.g. ``x86``) may attempt to provide information more precisely reflecting the current CPU frequency through this attribute, but that still may not be the exact current CPU frequency as - seen by the hardware at the moment. + seen by the hardware at the moment. This behavior though, is only + available via c:macro:``CPUFREQ_ARCH_CUR_FREQ`` option. ``scaling_driver`` The scaling driver currently in use. diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig index 940343beb3d4c..c8f48945cc09b 100644 --- a/arch/arm64/Kconfig +++ b/arch/arm64/Kconfig @@ -323,7 +323,7 @@ config ARCH_MMAP_RND_BITS_MIN default 18 # max bits determined by the following formula: -# VA_BITS - PAGE_SHIFT - 3 +# VA_BITS - PTDESC_TABLE_SHIFT config ARCH_MMAP_RND_BITS_MAX default 19 if ARM64_VA_BITS=36 default 24 if ARM64_VA_BITS=39 diff --git a/arch/arm64/include/asm/asm-extable.h b/arch/arm64/include/asm/asm-extable.h index b8a5861dc7b77..292f2687a12e5 100644 --- a/arch/arm64/include/asm/asm-extable.h +++ b/arch/arm64/include/asm/asm-extable.h @@ -9,7 +9,8 @@ #define EX_TYPE_BPF 1 #define EX_TYPE_UACCESS_ERR_ZERO 2 #define EX_TYPE_KACCESS_ERR_ZERO 3 -#define EX_TYPE_LOAD_UNALIGNED_ZEROPAD 4 +#define EX_TYPE_UACCESS_CPY 4 +#define EX_TYPE_LOAD_UNALIGNED_ZEROPAD 5 /* Data fields for EX_TYPE_UACCESS_ERR_ZERO */ #define EX_DATA_REG_ERR_SHIFT 0 @@ -23,6 +24,9 @@ #define EX_DATA_REG_ADDR_SHIFT 5 #define EX_DATA_REG_ADDR GENMASK(9, 5) +/* Data fields for EX_TYPE_UACCESS_CPY */ +#define EX_DATA_UACCESS_WRITE BIT(0) + #ifdef __ASSEMBLY__ #define __ASM_EXTABLE_RAW(insn, fixup, type, data) \ @@ -69,6 +73,10 @@ .endif .endm + .macro _asm_extable_uaccess_cpy, insn, fixup, uaccess_is_write + __ASM_EXTABLE_RAW(\insn, \fixup, EX_TYPE_UACCESS_CPY, \uaccess_is_write) + .endm + #else /* __ASSEMBLY__ */ #include diff --git a/arch/arm64/include/asm/asm-uaccess.h b/arch/arm64/include/asm/asm-uaccess.h index 5b6efe8abeeb4..9148f5a319681 100644 --- a/arch/arm64/include/asm/asm-uaccess.h +++ b/arch/arm64/include/asm/asm-uaccess.h @@ -61,6 +61,10 @@ alternative_else_nop_endif 9999: x; \ _asm_extable_uaccess 9999b, l +#define USER_CPY(l, uaccess_is_write, x...) \ +9999: x; \ + _asm_extable_uaccess_cpy 9999b, l, uaccess_is_write + /* * Generate the assembly for LDTR/STTR with exception table entries. * This is complicated as there is no post-increment or pair versions of the diff --git a/arch/arm64/include/asm/cputype.h b/arch/arm64/include/asm/cputype.h index 6f3f4142e214f..20284bf34722e 100644 --- a/arch/arm64/include/asm/cputype.h +++ b/arch/arm64/include/asm/cputype.h @@ -75,6 +75,7 @@ #define ARM_CPU_PART_CORTEX_A76 0xD0B #define ARM_CPU_PART_NEOVERSE_N1 0xD0C #define ARM_CPU_PART_CORTEX_A77 0xD0D +#define ARM_CPU_PART_CORTEX_A76AE 0xD0E #define ARM_CPU_PART_NEOVERSE_V1 0xD40 #define ARM_CPU_PART_CORTEX_A78 0xD41 #define ARM_CPU_PART_CORTEX_A78AE 0xD42 @@ -119,6 +120,7 @@ #define QCOM_CPU_PART_KRYO 0x200 #define QCOM_CPU_PART_KRYO_2XX_GOLD 0x800 #define QCOM_CPU_PART_KRYO_2XX_SILVER 0x801 +#define QCOM_CPU_PART_KRYO_3XX_GOLD 0x802 #define QCOM_CPU_PART_KRYO_3XX_SILVER 0x803 #define QCOM_CPU_PART_KRYO_4XX_GOLD 0x804 #define QCOM_CPU_PART_KRYO_4XX_SILVER 0x805 @@ -159,6 +161,7 @@ #define MIDR_CORTEX_A76 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A76) #define MIDR_NEOVERSE_N1 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_NEOVERSE_N1) #define MIDR_CORTEX_A77 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A77) +#define MIDR_CORTEX_A76AE MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A76AE) #define MIDR_NEOVERSE_V1 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_NEOVERSE_V1) #define MIDR_CORTEX_A78 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A78) #define MIDR_CORTEX_A78AE MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A78AE) @@ -196,10 +199,21 @@ #define MIDR_QCOM_KRYO MIDR_CPU_MODEL(ARM_CPU_IMP_QCOM, QCOM_CPU_PART_KRYO) #define MIDR_QCOM_KRYO_2XX_GOLD MIDR_CPU_MODEL(ARM_CPU_IMP_QCOM, QCOM_CPU_PART_KRYO_2XX_GOLD) #define MIDR_QCOM_KRYO_2XX_SILVER MIDR_CPU_MODEL(ARM_CPU_IMP_QCOM, QCOM_CPU_PART_KRYO_2XX_SILVER) +#define MIDR_QCOM_KRYO_3XX_GOLD MIDR_CPU_MODEL(ARM_CPU_IMP_QCOM, QCOM_CPU_PART_KRYO_3XX_GOLD) #define MIDR_QCOM_KRYO_3XX_SILVER MIDR_CPU_MODEL(ARM_CPU_IMP_QCOM, QCOM_CPU_PART_KRYO_3XX_SILVER) #define MIDR_QCOM_KRYO_4XX_GOLD MIDR_CPU_MODEL(ARM_CPU_IMP_QCOM, QCOM_CPU_PART_KRYO_4XX_GOLD) #define MIDR_QCOM_KRYO_4XX_SILVER MIDR_CPU_MODEL(ARM_CPU_IMP_QCOM, QCOM_CPU_PART_KRYO_4XX_SILVER) #define MIDR_QCOM_ORYON_X1 MIDR_CPU_MODEL(ARM_CPU_IMP_QCOM, QCOM_CPU_PART_ORYON_X1) + +/* + * NOTES: + * - Qualcomm Kryo 5XX Prime / Gold ID themselves as MIDR_CORTEX_A77 + * - Qualcomm Kryo 5XX Silver IDs itself as MIDR_QCOM_KRYO_4XX_SILVER + * - Qualcomm Kryo 6XX Prime IDs itself as MIDR_CORTEX_X1 + * - Qualcomm Kryo 6XX Gold IDs itself as ARM_CPU_PART_CORTEX_A78 + * - Qualcomm Kryo 6XX Silver IDs itself as MIDR_CORTEX_A55 + */ + #define MIDR_NVIDIA_DENVER MIDR_CPU_MODEL(ARM_CPU_IMP_NVIDIA, NVIDIA_CPU_PART_DENVER) #define MIDR_NVIDIA_CARMEL MIDR_CPU_MODEL(ARM_CPU_IMP_NVIDIA, NVIDIA_CPU_PART_CARMEL) #define MIDR_FUJITSU_A64FX MIDR_CPU_MODEL(ARM_CPU_IMP_FUJITSU, FUJITSU_CPU_PART_A64FX) diff --git a/arch/arm64/include/asm/extable.h b/arch/arm64/include/asm/extable.h index 72b0e71cc3de8..9dc39612bdf53 100644 --- a/arch/arm64/include/asm/extable.h +++ b/arch/arm64/include/asm/extable.h @@ -33,6 +33,8 @@ do { \ (b)->data = (tmp).data; \ } while (0) +bool insn_may_access_user(unsigned long addr, unsigned long esr); + #ifdef CONFIG_BPF_JIT bool ex_handler_bpf(const struct exception_table_entry *ex, struct pt_regs *regs); @@ -45,5 +47,5 @@ bool ex_handler_bpf(const struct exception_table_entry *ex, } #endif /* !CONFIG_BPF_JIT */ -bool fixup_exception(struct pt_regs *regs); +bool fixup_exception(struct pt_regs *regs, unsigned long esr); #endif diff --git a/arch/arm64/include/asm/fpsimd.h b/arch/arm64/include/asm/fpsimd.h index f2a84efc36185..564bc09b3e06d 100644 --- a/arch/arm64/include/asm/fpsimd.h +++ b/arch/arm64/include/asm/fpsimd.h @@ -80,7 +80,6 @@ extern void fpsimd_signal_preserve_current_state(void); extern void fpsimd_preserve_current_state(void); extern void fpsimd_restore_current_state(void); extern void fpsimd_update_current_state(struct user_fpsimd_state const *state); -extern void fpsimd_kvm_prepare(void); struct cpu_fp_state { struct user_fpsimd_state *st; diff --git a/arch/arm64/include/asm/kernel-pgtable.h b/arch/arm64/include/asm/kernel-pgtable.h index fd5a08450b12a..9e93733523f68 100644 --- a/arch/arm64/include/asm/kernel-pgtable.h +++ b/arch/arm64/include/asm/kernel-pgtable.h @@ -45,11 +45,11 @@ #define SPAN_NR_ENTRIES(vstart, vend, shift) \ ((((vend) - 1) >> (shift)) - ((vstart) >> (shift)) + 1) -#define EARLY_ENTRIES(vstart, vend, shift, add) \ - (SPAN_NR_ENTRIES(vstart, vend, shift) + (add)) +#define EARLY_ENTRIES(lvl, vstart, vend) \ + SPAN_NR_ENTRIES(vstart, vend, SWAPPER_BLOCK_SHIFT + lvl * PTDESC_TABLE_SHIFT) -#define EARLY_LEVEL(lvl, lvls, vstart, vend, add) \ - (lvls > lvl ? EARLY_ENTRIES(vstart, vend, SWAPPER_BLOCK_SHIFT + lvl * (PAGE_SHIFT - 3), add) : 0) +#define EARLY_LEVEL(lvl, lvls, vstart, vend, add) \ + ((lvls) > (lvl) ? EARLY_ENTRIES(lvl, vstart, vend) + (add) : 0) #define EARLY_PAGES(lvls, vstart, vend, add) (1 /* PGDIR page */ \ + EARLY_LEVEL(3, (lvls), (vstart), (vend), add) /* each entry needs a next level page table */ \ diff --git a/arch/arm64/include/asm/mem_encrypt.h b/arch/arm64/include/asm/mem_encrypt.h index f8f78f622dd2c..a2a1eeb36d4b5 100644 --- a/arch/arm64/include/asm/mem_encrypt.h +++ b/arch/arm64/include/asm/mem_encrypt.h @@ -21,4 +21,15 @@ static inline bool force_dma_unencrypted(struct device *dev) return is_realm_world(); } +/* + * For Arm CCA guests, canonical addresses are "encrypted", so no changes + * required for dma_addr_encrypted(). + * The unencrypted DMA buffers must be accessed via the unprotected IPA, + * "top IPA bit" set. + */ +#define dma_addr_unencrypted(x) ((x) | PROT_NS_SHARED) + +/* Clear the "top" IPA bit while converting back */ +#define dma_addr_canonical(x) ((x) & ~PROT_NS_SHARED) + #endif /* __ASM_MEM_ENCRYPT_H */ diff --git a/arch/arm64/include/asm/pgtable-hwdef.h b/arch/arm64/include/asm/pgtable-hwdef.h index a9136cc551ccb..f3b77deedfa2c 100644 --- a/arch/arm64/include/asm/pgtable-hwdef.h +++ b/arch/arm64/include/asm/pgtable-hwdef.h @@ -7,40 +7,46 @@ #include +#define PTDESC_ORDER 3 + +/* Number of VA bits resolved by a single translation table level */ +#define PTDESC_TABLE_SHIFT (PAGE_SHIFT - PTDESC_ORDER) + /* * Number of page-table levels required to address 'va_bits' wide * address, without section mapping. We resolve the top (va_bits - PAGE_SHIFT) - * bits with (PAGE_SHIFT - 3) bits at each page table level. Hence: + * bits with PTDESC_TABLE_SHIFT bits at each page table level. Hence: * - * levels = DIV_ROUND_UP((va_bits - PAGE_SHIFT), (PAGE_SHIFT - 3)) + * levels = DIV_ROUND_UP((va_bits - PAGE_SHIFT), PTDESC_TABLE_SHIFT) * * where DIV_ROUND_UP(n, d) => (((n) + (d) - 1) / (d)) * * We cannot include linux/kernel.h which defines DIV_ROUND_UP here * due to build issues. So we open code DIV_ROUND_UP here: * - * ((((va_bits) - PAGE_SHIFT) + (PAGE_SHIFT - 3) - 1) / (PAGE_SHIFT - 3)) + * ((((va_bits) - PAGE_SHIFT) + PTDESC_TABLE_SHIFT - 1) / PTDESC_TABLE_SHIFT) * * which gets simplified as : */ -#define ARM64_HW_PGTABLE_LEVELS(va_bits) (((va_bits) - 4) / (PAGE_SHIFT - 3)) +#define ARM64_HW_PGTABLE_LEVELS(va_bits) \ + (((va_bits) - PTDESC_ORDER - 1) / PTDESC_TABLE_SHIFT) /* * Size mapped by an entry at level n ( -1 <= n <= 3) - * We map (PAGE_SHIFT - 3) at all translation levels and PAGE_SHIFT bits + * We map PTDESC_TABLE_SHIFT at all translation levels and PAGE_SHIFT bits * in the final page. The maximum number of translation levels supported by * the architecture is 5. Hence, starting at level n, we have further * ((4 - n) - 1) levels of translation excluding the offset within the page. * So, the total number of bits mapped by an entry at level n is : * - * ((4 - n) - 1) * (PAGE_SHIFT - 3) + PAGE_SHIFT + * ((4 - n) - 1) * PTDESC_TABLE_SHIFT + PAGE_SHIFT * * Rearranging it a bit we get : - * (4 - n) * (PAGE_SHIFT - 3) + 3 + * (4 - n) * PTDESC_TABLE_SHIFT + PTDESC_ORDER */ -#define ARM64_HW_PGTABLE_LEVEL_SHIFT(n) ((PAGE_SHIFT - 3) * (4 - (n)) + 3) +#define ARM64_HW_PGTABLE_LEVEL_SHIFT(n) (PTDESC_TABLE_SHIFT * (4 - (n)) + PTDESC_ORDER) -#define PTRS_PER_PTE (1 << (PAGE_SHIFT - 3)) +#define PTRS_PER_PTE (1 << PTDESC_TABLE_SHIFT) /* * PMD_SHIFT determines the size a level 2 page table entry can map. @@ -49,7 +55,7 @@ #define PMD_SHIFT ARM64_HW_PGTABLE_LEVEL_SHIFT(2) #define PMD_SIZE (_AC(1, UL) << PMD_SHIFT) #define PMD_MASK (~(PMD_SIZE-1)) -#define PTRS_PER_PMD (1 << (PAGE_SHIFT - 3)) +#define PTRS_PER_PMD (1 << PTDESC_TABLE_SHIFT) #endif /* @@ -59,14 +65,14 @@ #define PUD_SHIFT ARM64_HW_PGTABLE_LEVEL_SHIFT(1) #define PUD_SIZE (_AC(1, UL) << PUD_SHIFT) #define PUD_MASK (~(PUD_SIZE-1)) -#define PTRS_PER_PUD (1 << (PAGE_SHIFT - 3)) +#define PTRS_PER_PUD (1 << PTDESC_TABLE_SHIFT) #endif #if CONFIG_PGTABLE_LEVELS > 4 #define P4D_SHIFT ARM64_HW_PGTABLE_LEVEL_SHIFT(0) #define P4D_SIZE (_AC(1, UL) << P4D_SHIFT) #define P4D_MASK (~(P4D_SIZE-1)) -#define PTRS_PER_P4D (1 << (PAGE_SHIFT - 3)) +#define PTRS_PER_P4D (1 << PTDESC_TABLE_SHIFT) #endif /* @@ -97,7 +103,6 @@ * Level -1 descriptor (PGD). */ #define PGD_TYPE_TABLE (_AT(pgdval_t, 3) << 0) -#define PGD_TABLE_BIT (_AT(pgdval_t, 1) << 1) #define PGD_TYPE_MASK (_AT(pgdval_t, 3) << 0) #define PGD_TABLE_AF (_AT(pgdval_t, 1) << 10) /* Ignored if no FEAT_HAFT */ #define PGD_TABLE_PXN (_AT(pgdval_t, 1) << 59) @@ -107,7 +112,6 @@ * Level 0 descriptor (P4D). */ #define P4D_TYPE_TABLE (_AT(p4dval_t, 3) << 0) -#define P4D_TABLE_BIT (_AT(p4dval_t, 1) << 1) #define P4D_TYPE_MASK (_AT(p4dval_t, 3) << 0) #define P4D_TYPE_SECT (_AT(p4dval_t, 1) << 0) #define P4D_SECT_RDONLY (_AT(p4dval_t, 1) << 7) /* AP[2] */ @@ -119,7 +123,6 @@ * Level 1 descriptor (PUD). */ #define PUD_TYPE_TABLE (_AT(pudval_t, 3) << 0) -#define PUD_TABLE_BIT (_AT(pudval_t, 1) << 1) #define PUD_TYPE_MASK (_AT(pudval_t, 3) << 0) #define PUD_TYPE_SECT (_AT(pudval_t, 1) << 0) #define PUD_SECT_RDONLY (_AT(pudval_t, 1) << 7) /* AP[2] */ @@ -133,7 +136,6 @@ #define PMD_TYPE_MASK (_AT(pmdval_t, 3) << 0) #define PMD_TYPE_TABLE (_AT(pmdval_t, 3) << 0) #define PMD_TYPE_SECT (_AT(pmdval_t, 1) << 0) -#define PMD_TABLE_BIT (_AT(pmdval_t, 1) << 1) #define PMD_TABLE_AF (_AT(pmdval_t, 1) << 10) /* Ignored if no FEAT_HAFT */ /* @@ -162,7 +164,6 @@ #define PTE_VALID (_AT(pteval_t, 1) << 0) #define PTE_TYPE_MASK (_AT(pteval_t, 3) << 0) #define PTE_TYPE_PAGE (_AT(pteval_t, 3) << 0) -#define PTE_TABLE_BIT (_AT(pteval_t, 1) << 1) #define PTE_USER (_AT(pteval_t, 1) << 6) /* AP[1] */ #define PTE_RDONLY (_AT(pteval_t, 1) << 7) /* AP[2] */ #define PTE_SHARED (_AT(pteval_t, 3) << 8) /* SH[1:0], inner shareable */ diff --git a/arch/arm64/include/asm/pgtable-prot.h b/arch/arm64/include/asm/pgtable-prot.h index a95f1f77bb39a..7830d031742e5 100644 --- a/arch/arm64/include/asm/pgtable-prot.h +++ b/arch/arm64/include/asm/pgtable-prot.h @@ -169,25 +169,25 @@ static inline bool __pure lpa2_is_enabled(void) #define PAGE_GCS_RO __pgprot(_PAGE_GCS_RO) #define PIE_E0 ( \ - PIRx_ELx_PERM(pte_pi_index(_PAGE_GCS), PIE_GCS) | \ - PIRx_ELx_PERM(pte_pi_index(_PAGE_GCS_RO), PIE_R) | \ - PIRx_ELx_PERM(pte_pi_index(_PAGE_EXECONLY), PIE_X_O) | \ - PIRx_ELx_PERM(pte_pi_index(_PAGE_READONLY_EXEC), PIE_RX_O) | \ - PIRx_ELx_PERM(pte_pi_index(_PAGE_SHARED_EXEC), PIE_RWX_O) | \ - PIRx_ELx_PERM(pte_pi_index(_PAGE_READONLY), PIE_R_O) | \ - PIRx_ELx_PERM(pte_pi_index(_PAGE_SHARED), PIE_RW_O)) + PIRx_ELx_PERM_PREP(pte_pi_index(_PAGE_GCS), PIE_GCS) | \ + PIRx_ELx_PERM_PREP(pte_pi_index(_PAGE_GCS_RO), PIE_R) | \ + PIRx_ELx_PERM_PREP(pte_pi_index(_PAGE_EXECONLY), PIE_X_O) | \ + PIRx_ELx_PERM_PREP(pte_pi_index(_PAGE_READONLY_EXEC), PIE_RX_O) | \ + PIRx_ELx_PERM_PREP(pte_pi_index(_PAGE_SHARED_EXEC), PIE_RWX_O) | \ + PIRx_ELx_PERM_PREP(pte_pi_index(_PAGE_READONLY), PIE_R_O) | \ + PIRx_ELx_PERM_PREP(pte_pi_index(_PAGE_SHARED), PIE_RW_O)) #define PIE_E1 ( \ - PIRx_ELx_PERM(pte_pi_index(_PAGE_GCS), PIE_NONE_O) | \ - PIRx_ELx_PERM(pte_pi_index(_PAGE_GCS_RO), PIE_NONE_O) | \ - PIRx_ELx_PERM(pte_pi_index(_PAGE_EXECONLY), PIE_NONE_O) | \ - PIRx_ELx_PERM(pte_pi_index(_PAGE_READONLY_EXEC), PIE_R) | \ - PIRx_ELx_PERM(pte_pi_index(_PAGE_SHARED_EXEC), PIE_RW) | \ - PIRx_ELx_PERM(pte_pi_index(_PAGE_READONLY), PIE_R) | \ - PIRx_ELx_PERM(pte_pi_index(_PAGE_SHARED), PIE_RW) | \ - PIRx_ELx_PERM(pte_pi_index(_PAGE_KERNEL_ROX), PIE_RX) | \ - PIRx_ELx_PERM(pte_pi_index(_PAGE_KERNEL_EXEC), PIE_RWX) | \ - PIRx_ELx_PERM(pte_pi_index(_PAGE_KERNEL_RO), PIE_R) | \ - PIRx_ELx_PERM(pte_pi_index(_PAGE_KERNEL), PIE_RW)) + PIRx_ELx_PERM_PREP(pte_pi_index(_PAGE_GCS), PIE_NONE_O) | \ + PIRx_ELx_PERM_PREP(pte_pi_index(_PAGE_GCS_RO), PIE_NONE_O) | \ + PIRx_ELx_PERM_PREP(pte_pi_index(_PAGE_EXECONLY), PIE_NONE_O) | \ + PIRx_ELx_PERM_PREP(pte_pi_index(_PAGE_READONLY_EXEC), PIE_R) | \ + PIRx_ELx_PERM_PREP(pte_pi_index(_PAGE_SHARED_EXEC), PIE_RW) | \ + PIRx_ELx_PERM_PREP(pte_pi_index(_PAGE_READONLY), PIE_R) | \ + PIRx_ELx_PERM_PREP(pte_pi_index(_PAGE_SHARED), PIE_RW) | \ + PIRx_ELx_PERM_PREP(pte_pi_index(_PAGE_KERNEL_ROX), PIE_RX) | \ + PIRx_ELx_PERM_PREP(pte_pi_index(_PAGE_KERNEL_EXEC), PIE_RWX) | \ + PIRx_ELx_PERM_PREP(pte_pi_index(_PAGE_KERNEL_RO), PIE_R) | \ + PIRx_ELx_PERM_PREP(pte_pi_index(_PAGE_KERNEL), PIE_RW)) #endif /* __ASM_PGTABLE_PROT_H */ diff --git a/arch/arm64/include/asm/pgtable.h b/arch/arm64/include/asm/pgtable.h index 0b2a2ad1b9e83..84f05f781a70d 100644 --- a/arch/arm64/include/asm/pgtable.h +++ b/arch/arm64/include/asm/pgtable.h @@ -68,10 +68,6 @@ extern unsigned long empty_zero_page[PAGE_SIZE / sizeof(unsigned long)]; #define pte_ERROR(e) \ pr_err("%s:%d: bad pte %016llx.\n", __FILE__, __LINE__, pte_val(e)) -/* - * Macros to convert between a physical address and its placement in a - * page table entry, taking care of 52-bit addresses. - */ #ifdef CONFIG_ARM64_PA_BITS_52 static inline phys_addr_t __pte_to_phys(pte_t pte) { @@ -84,8 +80,15 @@ static inline pteval_t __phys_to_pte_val(phys_addr_t phys) return (phys | (phys >> PTE_ADDR_HIGH_SHIFT)) & PHYS_TO_PTE_ADDR_MASK; } #else -#define __pte_to_phys(pte) (pte_val(pte) & PTE_ADDR_LOW) -#define __phys_to_pte_val(phys) (phys) +static inline phys_addr_t __pte_to_phys(pte_t pte) +{ + return pte_val(pte) & PTE_ADDR_LOW; +} + +static inline pteval_t __phys_to_pte_val(phys_addr_t phys) +{ + return phys; +} #endif #define pte_pfn(pte) (__pte_to_phys(pte) >> PAGE_SHIFT) @@ -483,12 +486,12 @@ static inline pmd_t pte_pmd(pte_t pte) static inline pgprot_t mk_pud_sect_prot(pgprot_t prot) { - return __pgprot((pgprot_val(prot) & ~PUD_TABLE_BIT) | PUD_TYPE_SECT); + return __pgprot((pgprot_val(prot) & ~PUD_TYPE_MASK) | PUD_TYPE_SECT); } static inline pgprot_t mk_pmd_sect_prot(pgprot_t prot) { - return __pgprot((pgprot_val(prot) & ~PMD_TABLE_BIT) | PMD_TYPE_SECT); + return __pgprot((pgprot_val(prot) & ~PMD_TYPE_MASK) | PMD_TYPE_SECT); } static inline pte_t pte_swp_mkexclusive(pte_t pte) @@ -548,18 +551,6 @@ static inline int pmd_protnone(pmd_t pmd) #endif #define pmd_present(pmd) pte_present(pmd_pte(pmd)) - -/* - * THP definitions. - */ - -#ifdef CONFIG_TRANSPARENT_HUGEPAGE -static inline int pmd_trans_huge(pmd_t pmd) -{ - return pmd_val(pmd) && pmd_present(pmd) && !(pmd_val(pmd) & PMD_TABLE_BIT); -} -#endif /* CONFIG_TRANSPARENT_HUGEPAGE */ - #define pmd_dirty(pmd) pte_dirty(pmd_pte(pmd)) #define pmd_young(pmd) pte_young(pmd_pte(pmd)) #define pmd_valid(pmd) pte_valid(pmd_pte(pmd)) @@ -585,7 +576,18 @@ static inline int pmd_trans_huge(pmd_t pmd) #define pmd_write(pmd) pte_write(pmd_pte(pmd)) -#define pmd_mkhuge(pmd) (__pmd(pmd_val(pmd) & ~PMD_TABLE_BIT)) +static inline pmd_t pmd_mkhuge(pmd_t pmd) +{ + /* + * It's possible that the pmd is present-invalid on entry + * and in that case it needs to remain present-invalid on + * exit. So ensure the VALID bit does not get modified. + */ + pmdval_t mask = PMD_TYPE_MASK & ~PTE_VALID; + pmdval_t val = PMD_TYPE_SECT & ~PTE_VALID; + + return __pmd((pmd_val(pmd) & ~mask) | val); +} #ifdef CONFIG_TRANSPARENT_HUGEPAGE #define pmd_devmap(pmd) pte_devmap(pmd_pte(pmd)) @@ -613,7 +615,18 @@ static inline pmd_t pmd_mkspecial(pmd_t pmd) #define pud_mkyoung(pud) pte_pud(pte_mkyoung(pud_pte(pud))) #define pud_write(pud) pte_write(pud_pte(pud)) -#define pud_mkhuge(pud) (__pud(pud_val(pud) & ~PUD_TABLE_BIT)) +static inline pud_t pud_mkhuge(pud_t pud) +{ + /* + * It's possible that the pud is present-invalid on entry + * and in that case it needs to remain present-invalid on + * exit. So ensure the VALID bit does not get modified. + */ + pudval_t mask = PUD_TYPE_MASK & ~PTE_VALID; + pudval_t val = PUD_TYPE_SECT & ~PTE_VALID; + + return __pud((pud_val(pud) & ~mask) | val); +} #define __pud_to_phys(pud) __pte_to_phys(pud_pte(pud)) #define __phys_to_pud_val(phys) __phys_to_pte_val(phys) @@ -724,6 +737,18 @@ extern pgprot_t phys_mem_access_prot(struct file *file, unsigned long pfn, #define pmd_leaf_size(pmd) (pmd_cont(pmd) ? CONT_PMD_SIZE : PMD_SIZE) #define pte_leaf_size(pte) (pte_cont(pte) ? CONT_PTE_SIZE : PAGE_SIZE) +#ifdef CONFIG_TRANSPARENT_HUGEPAGE +static inline int pmd_trans_huge(pmd_t pmd) +{ + /* + * If pmd is present-invalid, pmd_table() won't detect it + * as a table, so force the valid bit for the comparison. + */ + return pmd_val(pmd) && pmd_present(pmd) && + !pmd_table(__pmd(pmd_val(pmd) | PTE_VALID)); +} +#endif /* CONFIG_TRANSPARENT_HUGEPAGE */ + #if defined(CONFIG_ARM64_64K_PAGES) || CONFIG_PGTABLE_LEVELS < 3 static inline bool pud_sect(pud_t pud) { return false; } static inline bool pud_table(pud_t pud) { return true; } @@ -805,7 +830,8 @@ static inline unsigned long pmd_page_vaddr(pmd_t pmd) pr_err("%s:%d: bad pmd %016llx.\n", __FILE__, __LINE__, pmd_val(e)) #define pud_none(pud) (!pud_val(pud)) -#define pud_bad(pud) (!pud_table(pud)) +#define pud_bad(pud) ((pud_val(pud) & PUD_TYPE_MASK) != \ + PUD_TYPE_TABLE) #define pud_present(pud) pte_present(pud_pte(pud)) #ifndef __PAGETABLE_PMD_FOLDED #define pud_leaf(pud) (pud_present(pud) && !pud_table(pud)) @@ -896,7 +922,9 @@ static inline bool mm_pud_folded(const struct mm_struct *mm) pr_err("%s:%d: bad pud %016llx.\n", __FILE__, __LINE__, pud_val(e)) #define p4d_none(p4d) (pgtable_l4_enabled() && !p4d_val(p4d)) -#define p4d_bad(p4d) (pgtable_l4_enabled() && !(p4d_val(p4d) & P4D_TABLE_BIT)) +#define p4d_bad(p4d) (pgtable_l4_enabled() && \ + ((p4d_val(p4d) & P4D_TYPE_MASK) != \ + P4D_TYPE_TABLE)) #define p4d_present(p4d) (!p4d_none(p4d)) static inline void set_p4d(p4d_t *p4dp, p4d_t p4d) @@ -1023,7 +1051,9 @@ static inline bool mm_p4d_folded(const struct mm_struct *mm) pr_err("%s:%d: bad p4d %016llx.\n", __FILE__, __LINE__, p4d_val(e)) #define pgd_none(pgd) (pgtable_l5_enabled() && !pgd_val(pgd)) -#define pgd_bad(pgd) (pgtable_l5_enabled() && !(pgd_val(pgd) & PGD_TABLE_BIT)) +#define pgd_bad(pgd) (pgtable_l5_enabled() && \ + ((pgd_val(pgd) & PGD_TYPE_MASK) != \ + PGD_TYPE_TABLE)) #define pgd_present(pgd) (!pgd_none(pgd)) static inline void set_pgd(pgd_t *pgdp, pgd_t pgd) diff --git a/arch/arm64/include/asm/por.h b/arch/arm64/include/asm/por.h index e06e9f473675f..d913d5b529e49 100644 --- a/arch/arm64/include/asm/por.h +++ b/arch/arm64/include/asm/por.h @@ -6,26 +6,27 @@ #ifndef _ASM_ARM64_POR_H #define _ASM_ARM64_POR_H -#define POR_BITS_PER_PKEY 4 -#define POR_ELx_IDX(por_elx, idx) (((por_elx) >> ((idx) * POR_BITS_PER_PKEY)) & 0xf) +#include + +#define POR_EL0_INIT POR_ELx_PERM_PREP(0, POE_RWX) static inline bool por_elx_allows_read(u64 por, u8 pkey) { - u8 perm = POR_ELx_IDX(por, pkey); + u8 perm = POR_ELx_PERM_GET(pkey, por); return perm & POE_R; } static inline bool por_elx_allows_write(u64 por, u8 pkey) { - u8 perm = POR_ELx_IDX(por, pkey); + u8 perm = POR_ELx_PERM_GET(pkey, por); return perm & POE_W; } static inline bool por_elx_allows_exec(u64 por, u8 pkey) { - u8 perm = POR_ELx_IDX(por, pkey); + u8 perm = POR_ELx_PERM_GET(pkey, por); return perm & POE_X; } diff --git a/arch/arm64/include/asm/spectre.h b/arch/arm64/include/asm/spectre.h index 0c4d9045c31f4..f1524cdeacf1c 100644 --- a/arch/arm64/include/asm/spectre.h +++ b/arch/arm64/include/asm/spectre.h @@ -97,7 +97,6 @@ enum mitigation_state arm64_get_meltdown_state(void); enum mitigation_state arm64_get_spectre_bhb_state(void); bool is_spectre_bhb_affected(const struct arm64_cpu_capabilities *entry, int scope); -u8 spectre_bhb_loop_affected(int scope); void spectre_bhb_enable_mitigation(const struct arm64_cpu_capabilities *__unused); bool try_emulate_el1_ssbs(struct pt_regs *regs, u32 instr); diff --git a/arch/arm64/include/asm/sysreg.h b/arch/arm64/include/asm/sysreg.h index 05ea5223d2d55..e3252f8bb465e 100644 --- a/arch/arm64/include/asm/sysreg.h +++ b/arch/arm64/include/asm/sysreg.h @@ -1062,8 +1062,11 @@ #define PIE_RX UL(0xa) #define PIE_RW UL(0xc) #define PIE_RWX UL(0xe) +#define PIE_MASK UL(0xf) -#define PIRx_ELx_PERM(idx, perm) ((perm) << ((idx) * 4)) +#define PIRx_ELx_BITS_PER_IDX 4 +#define PIRx_ELx_PERM_SHIFT(idx) ((idx) * PIRx_ELx_BITS_PER_IDX) +#define PIRx_ELx_PERM_PREP(idx, perm) (((perm) & PIE_MASK) << PIRx_ELx_PERM_SHIFT(idx)) /* * Permission Overlay Extension (POE) permission encodings. @@ -1074,12 +1077,14 @@ #define POE_RX UL(0x3) #define POE_W UL(0x4) #define POE_RW UL(0x5) -#define POE_XW UL(0x6) -#define POE_RXW UL(0x7) +#define POE_WX UL(0x6) +#define POE_RWX UL(0x7) #define POE_MASK UL(0xf) -/* Initial value for Permission Overlay Extension for EL0 */ -#define POR_EL0_INIT POE_RXW +#define POR_ELx_BITS_PER_IDX 4 +#define POR_ELx_PERM_SHIFT(idx) ((idx) * POR_ELx_BITS_PER_IDX) +#define POR_ELx_PERM_GET(idx, reg) (((reg) >> POR_ELx_PERM_SHIFT(idx)) & POE_MASK) +#define POR_ELx_PERM_PREP(idx, perm) (((perm) & POE_MASK) << POR_ELx_PERM_SHIFT(idx)) /* * Definitions for Guarded Control Stack diff --git a/arch/arm64/kernel/pi/map_range.c b/arch/arm64/kernel/pi/map_range.c index 2b69e3beeef80..81345f68f9fc0 100644 --- a/arch/arm64/kernel/pi/map_range.c +++ b/arch/arm64/kernel/pi/map_range.c @@ -31,7 +31,7 @@ void __init map_range(u64 *pte, u64 start, u64 end, u64 pa, pgprot_t prot, { u64 cmask = (level == 3) ? CONT_PTE_SIZE - 1 : U64_MAX; pteval_t protval = pgprot_val(prot) & ~PTE_TYPE_MASK; - int lshift = (3 - level) * (PAGE_SHIFT - 3); + int lshift = (3 - level) * PTDESC_TABLE_SHIFT; u64 lmask = (PAGE_SIZE << lshift) - 1; start &= PAGE_MASK; @@ -45,12 +45,12 @@ void __init map_range(u64 *pte, u64 start, u64 end, u64 pa, pgprot_t prot, * clearing the mapping */ if (protval) - protval |= (level < 3) ? PMD_TYPE_SECT : PTE_TYPE_PAGE; + protval |= (level == 2) ? PMD_TYPE_SECT : PTE_TYPE_PAGE; while (start < end) { u64 next = min((start | lmask) + 1, PAGE_ALIGN(end)); - if (level < 3 && (start | next | pa) & lmask) { + if (level < 2 || (level == 2 && (start | next | pa) & lmask)) { /* * This chunk needs a finer grained mapping. Create a * table mapping if necessary and recurse. diff --git a/arch/arm64/kernel/proton-pack.c b/arch/arm64/kernel/proton-pack.c index da53722f95d41..0f51fd10b4b06 100644 --- a/arch/arm64/kernel/proton-pack.c +++ b/arch/arm64/kernel/proton-pack.c @@ -845,52 +845,86 @@ static unsigned long system_bhb_mitigations; * This must be called with SCOPE_LOCAL_CPU for each type of CPU, before any * SCOPE_SYSTEM call will give the right answer. */ -u8 spectre_bhb_loop_affected(int scope) +static bool is_spectre_bhb_safe(int scope) +{ + static const struct midr_range spectre_bhb_safe_list[] = { + MIDR_ALL_VERSIONS(MIDR_CORTEX_A35), + MIDR_ALL_VERSIONS(MIDR_CORTEX_A53), + MIDR_ALL_VERSIONS(MIDR_CORTEX_A55), + MIDR_ALL_VERSIONS(MIDR_CORTEX_A510), + MIDR_ALL_VERSIONS(MIDR_CORTEX_A520), + MIDR_ALL_VERSIONS(MIDR_BRAHMA_B53), + MIDR_ALL_VERSIONS(MIDR_QCOM_KRYO_2XX_SILVER), + MIDR_ALL_VERSIONS(MIDR_QCOM_KRYO_3XX_SILVER), + MIDR_ALL_VERSIONS(MIDR_QCOM_KRYO_4XX_SILVER), + {}, + }; + static bool all_safe = true; + + if (scope != SCOPE_LOCAL_CPU) + return all_safe; + + if (is_midr_in_range_list(read_cpuid_id(), spectre_bhb_safe_list)) + return true; + + all_safe = false; + + return false; +} + +static u8 spectre_bhb_loop_affected(void) { u8 k = 0; - static u8 max_bhb_k; - - if (scope == SCOPE_LOCAL_CPU) { - static const struct midr_range spectre_bhb_k32_list[] = { - MIDR_ALL_VERSIONS(MIDR_CORTEX_A78), - MIDR_ALL_VERSIONS(MIDR_CORTEX_A78AE), - MIDR_ALL_VERSIONS(MIDR_CORTEX_A78C), - MIDR_ALL_VERSIONS(MIDR_CORTEX_X1), - MIDR_ALL_VERSIONS(MIDR_CORTEX_A710), - MIDR_ALL_VERSIONS(MIDR_CORTEX_X2), - MIDR_ALL_VERSIONS(MIDR_NEOVERSE_N2), - MIDR_ALL_VERSIONS(MIDR_NEOVERSE_V1), - {}, - }; - static const struct midr_range spectre_bhb_k24_list[] = { - MIDR_ALL_VERSIONS(MIDR_CORTEX_A76), - MIDR_ALL_VERSIONS(MIDR_CORTEX_A77), - MIDR_ALL_VERSIONS(MIDR_NEOVERSE_N1), - {}, - }; - static const struct midr_range spectre_bhb_k11_list[] = { - MIDR_ALL_VERSIONS(MIDR_AMPERE1), - {}, - }; - static const struct midr_range spectre_bhb_k8_list[] = { - MIDR_ALL_VERSIONS(MIDR_CORTEX_A72), - MIDR_ALL_VERSIONS(MIDR_CORTEX_A57), - {}, - }; - - if (is_midr_in_range_list(read_cpuid_id(), spectre_bhb_k32_list)) - k = 32; - else if (is_midr_in_range_list(read_cpuid_id(), spectre_bhb_k24_list)) - k = 24; - else if (is_midr_in_range_list(read_cpuid_id(), spectre_bhb_k11_list)) - k = 11; - else if (is_midr_in_range_list(read_cpuid_id(), spectre_bhb_k8_list)) - k = 8; - - max_bhb_k = max(max_bhb_k, k); - } else { - k = max_bhb_k; - } + + static const struct midr_range spectre_bhb_k132_list[] = { + MIDR_ALL_VERSIONS(MIDR_CORTEX_X3), + MIDR_ALL_VERSIONS(MIDR_NEOVERSE_V2), + }; + static const struct midr_range spectre_bhb_k38_list[] = { + MIDR_ALL_VERSIONS(MIDR_CORTEX_A715), + MIDR_ALL_VERSIONS(MIDR_CORTEX_A720), + }; + static const struct midr_range spectre_bhb_k32_list[] = { + MIDR_ALL_VERSIONS(MIDR_CORTEX_A78), + MIDR_ALL_VERSIONS(MIDR_CORTEX_A78AE), + MIDR_ALL_VERSIONS(MIDR_CORTEX_A78C), + MIDR_ALL_VERSIONS(MIDR_CORTEX_X1), + MIDR_ALL_VERSIONS(MIDR_CORTEX_A710), + MIDR_ALL_VERSIONS(MIDR_CORTEX_X2), + MIDR_ALL_VERSIONS(MIDR_NEOVERSE_N2), + MIDR_ALL_VERSIONS(MIDR_NEOVERSE_V1), + {}, + }; + static const struct midr_range spectre_bhb_k24_list[] = { + MIDR_ALL_VERSIONS(MIDR_CORTEX_A76), + MIDR_ALL_VERSIONS(MIDR_CORTEX_A76AE), + MIDR_ALL_VERSIONS(MIDR_CORTEX_A77), + MIDR_ALL_VERSIONS(MIDR_NEOVERSE_N1), + MIDR_ALL_VERSIONS(MIDR_QCOM_KRYO_4XX_GOLD), + {}, + }; + static const struct midr_range spectre_bhb_k11_list[] = { + MIDR_ALL_VERSIONS(MIDR_AMPERE1), + {}, + }; + static const struct midr_range spectre_bhb_k8_list[] = { + MIDR_ALL_VERSIONS(MIDR_CORTEX_A72), + MIDR_ALL_VERSIONS(MIDR_CORTEX_A57), + {}, + }; + + if (is_midr_in_range_list(read_cpuid_id(), spectre_bhb_k132_list)) + k = 132; + else if (is_midr_in_range_list(read_cpuid_id(), spectre_bhb_k38_list)) + k = 38; + else if (is_midr_in_range_list(read_cpuid_id(), spectre_bhb_k32_list)) + k = 32; + else if (is_midr_in_range_list(read_cpuid_id(), spectre_bhb_k24_list)) + k = 24; + else if (is_midr_in_range_list(read_cpuid_id(), spectre_bhb_k11_list)) + k = 11; + else if (is_midr_in_range_list(read_cpuid_id(), spectre_bhb_k8_list)) + k = 8; return k; } @@ -916,29 +950,13 @@ static enum mitigation_state spectre_bhb_get_cpu_fw_mitigation_state(void) } } -static bool is_spectre_bhb_fw_affected(int scope) +static bool has_spectre_bhb_fw_mitigation(void) { - static bool system_affected; enum mitigation_state fw_state; bool has_smccc = arm_smccc_1_1_get_conduit() != SMCCC_CONDUIT_NONE; - static const struct midr_range spectre_bhb_firmware_mitigated_list[] = { - MIDR_ALL_VERSIONS(MIDR_CORTEX_A73), - MIDR_ALL_VERSIONS(MIDR_CORTEX_A75), - {}, - }; - bool cpu_in_list = is_midr_in_range_list(read_cpuid_id(), - spectre_bhb_firmware_mitigated_list); - - if (scope != SCOPE_LOCAL_CPU) - return system_affected; fw_state = spectre_bhb_get_cpu_fw_mitigation_state(); - if (cpu_in_list || (has_smccc && fw_state == SPECTRE_MITIGATED)) { - system_affected = true; - return true; - } - - return false; + return has_smccc && fw_state == SPECTRE_MITIGATED; } static bool supports_ecbhb(int scope) @@ -954,6 +972,8 @@ static bool supports_ecbhb(int scope) ID_AA64MMFR1_EL1_ECBHB_SHIFT); } +static u8 max_bhb_k; + bool is_spectre_bhb_affected(const struct arm64_cpu_capabilities *entry, int scope) { @@ -962,16 +982,18 @@ bool is_spectre_bhb_affected(const struct arm64_cpu_capabilities *entry, if (supports_csv2p3(scope)) return false; - if (supports_clearbhb(scope)) - return true; - - if (spectre_bhb_loop_affected(scope)) - return true; + if (is_spectre_bhb_safe(scope)) + return false; - if (is_spectre_bhb_fw_affected(scope)) - return true; + /* + * At this point the core isn't known to be "safe" so we're going to + * assume it's vulnerable. We still need to update `max_bhb_k` though, + * but only if we aren't mitigating with clearbhb though. + */ + if (scope == SCOPE_LOCAL_CPU && !supports_clearbhb(SCOPE_LOCAL_CPU)) + max_bhb_k = max(max_bhb_k, spectre_bhb_loop_affected()); - return false; + return true; } static void this_cpu_set_vectors(enum arm64_bp_harden_el1_vectors slot) @@ -1002,7 +1024,7 @@ early_param("nospectre_bhb", parse_spectre_bhb_param); void spectre_bhb_enable_mitigation(const struct arm64_cpu_capabilities *entry) { bp_hardening_cb_t cpu_cb; - enum mitigation_state fw_state, state = SPECTRE_VULNERABLE; + enum mitigation_state state = SPECTRE_VULNERABLE; struct bp_hardening_data *data = this_cpu_ptr(&bp_hardening_data); if (!is_spectre_bhb_affected(entry, SCOPE_LOCAL_CPU)) @@ -1028,7 +1050,7 @@ void spectre_bhb_enable_mitigation(const struct arm64_cpu_capabilities *entry) this_cpu_set_vectors(EL1_VECTOR_BHB_CLEAR_INSN); state = SPECTRE_MITIGATED; set_bit(BHB_INSN, &system_bhb_mitigations); - } else if (spectre_bhb_loop_affected(SCOPE_LOCAL_CPU)) { + } else if (spectre_bhb_loop_affected()) { /* * Ensure KVM uses the indirect vector which will have the * branchy-loop added. A57/A72-r0 will already have selected @@ -1041,32 +1063,29 @@ void spectre_bhb_enable_mitigation(const struct arm64_cpu_capabilities *entry) this_cpu_set_vectors(EL1_VECTOR_BHB_LOOP); state = SPECTRE_MITIGATED; set_bit(BHB_LOOP, &system_bhb_mitigations); - } else if (is_spectre_bhb_fw_affected(SCOPE_LOCAL_CPU)) { - fw_state = spectre_bhb_get_cpu_fw_mitigation_state(); - if (fw_state == SPECTRE_MITIGATED) { - /* - * Ensure KVM uses one of the spectre bp_hardening - * vectors. The indirect vector doesn't include the EL3 - * call, so needs upgrading to - * HYP_VECTOR_SPECTRE_INDIRECT. - */ - if (!data->slot || data->slot == HYP_VECTOR_INDIRECT) - data->slot += 1; - - this_cpu_set_vectors(EL1_VECTOR_BHB_FW); - - /* - * The WA3 call in the vectors supersedes the WA1 call - * made during context-switch. Uninstall any firmware - * bp_hardening callback. - */ - cpu_cb = spectre_v2_get_sw_mitigation_cb(); - if (__this_cpu_read(bp_hardening_data.fn) != cpu_cb) - __this_cpu_write(bp_hardening_data.fn, NULL); - - state = SPECTRE_MITIGATED; - set_bit(BHB_FW, &system_bhb_mitigations); - } + } else if (has_spectre_bhb_fw_mitigation()) { + /* + * Ensure KVM uses one of the spectre bp_hardening + * vectors. The indirect vector doesn't include the EL3 + * call, so needs upgrading to + * HYP_VECTOR_SPECTRE_INDIRECT. + */ + if (!data->slot || data->slot == HYP_VECTOR_INDIRECT) + data->slot += 1; + + this_cpu_set_vectors(EL1_VECTOR_BHB_FW); + + /* + * The WA3 call in the vectors supersedes the WA1 call + * made during context-switch. Uninstall any firmware + * bp_hardening callback. + */ + cpu_cb = spectre_v2_get_sw_mitigation_cb(); + if (__this_cpu_read(bp_hardening_data.fn) != cpu_cb) + __this_cpu_write(bp_hardening_data.fn, NULL); + + state = SPECTRE_MITIGATED; + set_bit(BHB_FW, &system_bhb_mitigations); } update_mitigation_state(&spectre_bhb_state, state); @@ -1100,7 +1119,6 @@ void noinstr spectre_bhb_patch_loop_iter(struct alt_instr *alt, { u8 rd; u32 insn; - u16 loop_count = spectre_bhb_loop_affected(SCOPE_SYSTEM); BUG_ON(nr_inst != 1); /* MOV -> MOV */ @@ -1109,7 +1127,7 @@ void noinstr spectre_bhb_patch_loop_iter(struct alt_instr *alt, insn = le32_to_cpu(*origptr); rd = aarch64_insn_decode_register(AARCH64_INSN_REGTYPE_RD, insn); - insn = aarch64_insn_gen_movewide(rd, loop_count, 0, + insn = aarch64_insn_gen_movewide(rd, max_bhb_k, 0, AARCH64_INSN_VARIANT_64BIT, AARCH64_INSN_MOVEWIDE_ZERO); *updptr++ = cpu_to_le32(insn); diff --git a/arch/arm64/kernel/signal.c b/arch/arm64/kernel/signal.c index 99ea26d400ffe..a7c37afb4ebeb 100644 --- a/arch/arm64/kernel/signal.c +++ b/arch/arm64/kernel/signal.c @@ -91,7 +91,7 @@ static void save_reset_user_access_state(struct user_access_state *ua_state) u64 por_enable_all = 0; for (int pkey = 0; pkey < arch_max_pkey(); pkey++) - por_enable_all |= POE_RXW << (pkey * POR_BITS_PER_PKEY); + por_enable_all |= POR_ELx_PERM_PREP(pkey, POE_RWX); ua_state->por_el0 = read_sysreg_s(SYS_POR_EL0); write_sysreg_s(por_enable_all, SYS_POR_EL0); diff --git a/arch/arm64/kernel/topology.c b/arch/arm64/kernel/topology.c index cb180684d10d5..9e3583720668a 100644 --- a/arch/arm64/kernel/topology.c +++ b/arch/arm64/kernel/topology.c @@ -17,6 +17,7 @@ #include #include #include +#include #include #include @@ -88,18 +89,28 @@ int __init parse_acpi_topology(void) * initialized. */ static DEFINE_PER_CPU_READ_MOSTLY(unsigned long, arch_max_freq_scale) = 1UL << (2 * SCHED_CAPACITY_SHIFT); -static DEFINE_PER_CPU(u64, arch_const_cycles_prev); -static DEFINE_PER_CPU(u64, arch_core_cycles_prev); static cpumask_var_t amu_fie_cpus; +struct amu_cntr_sample { + u64 arch_const_cycles_prev; + u64 arch_core_cycles_prev; + unsigned long last_scale_update; +}; + +static DEFINE_PER_CPU_SHARED_ALIGNED(struct amu_cntr_sample, cpu_amu_samples); + void update_freq_counters_refs(void) { - this_cpu_write(arch_core_cycles_prev, read_corecnt()); - this_cpu_write(arch_const_cycles_prev, read_constcnt()); + struct amu_cntr_sample *amu_sample = this_cpu_ptr(&cpu_amu_samples); + + amu_sample->arch_core_cycles_prev = read_corecnt(); + amu_sample->arch_const_cycles_prev = read_constcnt(); } static inline bool freq_counters_valid(int cpu) { + struct amu_cntr_sample *amu_sample = per_cpu_ptr(&cpu_amu_samples, cpu); + if ((cpu >= nr_cpu_ids) || !cpumask_test_cpu(cpu, cpu_present_mask)) return false; @@ -108,8 +119,8 @@ static inline bool freq_counters_valid(int cpu) return false; } - if (unlikely(!per_cpu(arch_const_cycles_prev, cpu) || - !per_cpu(arch_core_cycles_prev, cpu))) { + if (unlikely(!amu_sample->arch_const_cycles_prev || + !amu_sample->arch_core_cycles_prev)) { pr_debug("CPU%d: cycle counters are not enabled.\n", cpu); return false; } @@ -152,17 +163,22 @@ void freq_inv_set_max_ratio(int cpu, u64 max_rate) static void amu_scale_freq_tick(void) { + struct amu_cntr_sample *amu_sample = this_cpu_ptr(&cpu_amu_samples); u64 prev_core_cnt, prev_const_cnt; u64 core_cnt, const_cnt, scale; - prev_const_cnt = this_cpu_read(arch_const_cycles_prev); - prev_core_cnt = this_cpu_read(arch_core_cycles_prev); + prev_const_cnt = amu_sample->arch_const_cycles_prev; + prev_core_cnt = amu_sample->arch_core_cycles_prev; update_freq_counters_refs(); - const_cnt = this_cpu_read(arch_const_cycles_prev); - core_cnt = this_cpu_read(arch_core_cycles_prev); + const_cnt = amu_sample->arch_const_cycles_prev; + core_cnt = amu_sample->arch_core_cycles_prev; + /* + * This should not happen unless the AMUs have been reset and the + * counter values have not been restored - unlikely + */ if (unlikely(core_cnt <= prev_core_cnt || const_cnt <= prev_const_cnt)) return; @@ -182,6 +198,8 @@ static void amu_scale_freq_tick(void) scale = min_t(unsigned long, scale, SCHED_CAPACITY_SCALE); this_cpu_write(arch_freq_scale, (unsigned long)scale); + + amu_sample->last_scale_update = jiffies; } static struct scale_freq_data amu_sfd = { @@ -189,6 +207,96 @@ static struct scale_freq_data amu_sfd = { .set_freq_scale = amu_scale_freq_tick, }; +static __always_inline bool amu_fie_cpu_supported(unsigned int cpu) +{ + return cpumask_available(amu_fie_cpus) && + cpumask_test_cpu(cpu, amu_fie_cpus); +} + +void arch_cpu_idle_enter(void) +{ + unsigned int cpu = smp_processor_id(); + + if (!amu_fie_cpu_supported(cpu)) + return; + + /* Kick in AMU update but only if one has not happened already */ + if (housekeeping_cpu(cpu, HK_TYPE_TICK) && + time_is_before_jiffies(per_cpu(cpu_amu_samples.last_scale_update, cpu))) + amu_scale_freq_tick(); +} + +#define AMU_SAMPLE_EXP_MS 20 + +int arch_freq_get_on_cpu(int cpu) +{ + struct amu_cntr_sample *amu_sample; + unsigned int start_cpu = cpu; + unsigned long last_update; + unsigned int freq = 0; + u64 scale; + + if (!amu_fie_cpu_supported(cpu) || !arch_scale_freq_ref(cpu)) + return -EOPNOTSUPP; + + while (1) { + + amu_sample = per_cpu_ptr(&cpu_amu_samples, cpu); + + last_update = amu_sample->last_scale_update; + + /* + * For those CPUs that are in full dynticks mode, or those that have + * not seen tick for a while, try an alternative source for the counters + * (and thus freq scale), if available, for given policy: this boils + * down to identifying an active cpu within the same freq domain, if any. + */ + if (!housekeeping_cpu(cpu, HK_TYPE_TICK) || + time_is_before_jiffies(last_update + msecs_to_jiffies(AMU_SAMPLE_EXP_MS))) { + struct cpufreq_policy *policy = cpufreq_cpu_get(cpu); + int ref_cpu; + + if (!policy) + return -EINVAL; + + if (!cpumask_intersects(policy->related_cpus, + housekeeping_cpumask(HK_TYPE_TICK))) { + cpufreq_cpu_put(policy); + return -EOPNOTSUPP; + } + + for_each_cpu_wrap(ref_cpu, policy->cpus, cpu + 1) { + if (ref_cpu == start_cpu) { + /* Prevent verifying same CPU twice */ + ref_cpu = nr_cpu_ids; + break; + } + if (!idle_cpu(ref_cpu)) + break; + } + + cpufreq_cpu_put(policy); + + if (ref_cpu >= nr_cpu_ids) + /* No alternative to pull info from */ + return -EAGAIN; + + cpu = ref_cpu; + } else { + break; + } + } + /* + * Reversed computation to the one used to determine + * the arch_freq_scale value + * (see amu_scale_freq_tick for details) + */ + scale = arch_scale_freq_capacity(cpu); + freq = scale * arch_scale_freq_ref(cpu); + freq >>= SCHED_CAPACITY_SHIFT; + return freq; +} + static void amu_fie_setup(const struct cpumask *cpus) { int cpu; diff --git a/arch/arm64/kvm/at.c b/arch/arm64/kvm/at.c index 3a96c96816e93..f74a66ce3064b 100644 --- a/arch/arm64/kvm/at.c +++ b/arch/arm64/kvm/at.c @@ -1090,22 +1090,22 @@ static void compute_s1_overlay_permissions(struct kvm_vcpu *vcpu, break; } - if (pov_perms & ~POE_RXW) + if (pov_perms & ~POE_RWX) pov_perms = POE_NONE; if (wi->poe && wr->pov) { wr->pr &= pov_perms & POE_R; - wr->px &= pov_perms & POE_X; wr->pw &= pov_perms & POE_W; + wr->px &= pov_perms & POE_X; } - if (uov_perms & ~POE_RXW) + if (uov_perms & ~POE_RWX) uov_perms = POE_NONE; if (wi->e0poe && wr->uov) { wr->ur &= uov_perms & POE_R; - wr->ux &= uov_perms & POE_X; wr->uw &= uov_perms & POE_W; + wr->ux &= uov_perms & POE_X; } } diff --git a/arch/arm64/kvm/ptdump.c b/arch/arm64/kvm/ptdump.c index e4a342e903e25..098416d7e5c25 100644 --- a/arch/arm64/kvm/ptdump.c +++ b/arch/arm64/kvm/ptdump.c @@ -52,8 +52,8 @@ static const struct ptdump_prot_bits stage2_pte_bits[] = { .set = "AF", .clear = " ", }, { - .mask = PTE_TABLE_BIT | PTE_VALID, - .val = PTE_VALID, + .mask = PMD_TYPE_MASK, + .val = PMD_TYPE_SECT, .set = "BLK", .clear = " ", }, diff --git a/arch/arm64/lib/clear_user.S b/arch/arm64/lib/clear_user.S index a5a5f5b97b175..de9a303b6ad0e 100644 --- a/arch/arm64/lib/clear_user.S +++ b/arch/arm64/lib/clear_user.S @@ -17,14 +17,27 @@ * Alignment fixed up by hardware. */ - .p2align 4 - // Alignment is for the loop, but since the prologue (including BTI) - // is also 16 bytes we can keep any padding outside the function SYM_FUNC_START(__arch_clear_user) add x2, x0, x1 + +#ifdef CONFIG_AS_HAS_MOPS + .arch_extension mops +alternative_if_not ARM64_HAS_MOPS + b .Lno_mops +alternative_else_nop_endif + +USER(9f, setpt [x0]!, x1!, xzr) +USER(6f, setmt [x0]!, x1!, xzr) +USER(6f, setet [x0]!, x1!, xzr) + mov x0, #0 + ret +.Lno_mops: +#endif + subs x1, x1, #8 b.mi 2f -1: + +1: .p2align 4 USER(9f, sttr xzr, [x0]) add x0, x0, #8 subs x1, x1, #8 @@ -47,6 +60,10 @@ USER(7f, sttrb wzr, [x2, #-1]) ret // Exception fixups +6: b.cs 9f + // Registers are in Option A format + add x0, x0, x1 + b 9f 7: sub x0, x2, #5 // Adjust for faulting on the final byte... 8: add x0, x0, #4 // ...or the second word of the 4-7 byte case 9: sub x0, x2, x0 diff --git a/arch/arm64/lib/copy_from_user.S b/arch/arm64/lib/copy_from_user.S index 34e3179075244..400057d607ecd 100644 --- a/arch/arm64/lib/copy_from_user.S +++ b/arch/arm64/lib/copy_from_user.S @@ -52,6 +52,13 @@ stp \reg1, \reg2, [\ptr], \val .endm + .macro cpy1 dst, src, count + .arch_extension mops + USER_CPY(9997f, 0, cpyfprt [\dst]!, [\src]!, \count!) + USER_CPY(9996f, 0, cpyfmrt [\dst]!, [\src]!, \count!) + USER_CPY(9996f, 0, cpyfert [\dst]!, [\src]!, \count!) + .endm + end .req x5 srcin .req x15 SYM_FUNC_START(__arch_copy_from_user) @@ -62,6 +69,9 @@ SYM_FUNC_START(__arch_copy_from_user) ret // Exception fixups +9996: b.cs 9997f + // Registers are in Option A format + add dst, dst, count 9997: cmp dst, dstin b.ne 9998f // Before being absolutely sure we couldn't copy anything, try harder diff --git a/arch/arm64/lib/copy_template.S b/arch/arm64/lib/copy_template.S index 488df234c49a2..7f2f5a0e2fb9f 100644 --- a/arch/arm64/lib/copy_template.S +++ b/arch/arm64/lib/copy_template.S @@ -40,6 +40,16 @@ D_l .req x13 D_h .req x14 mov dst, dstin + +#ifdef CONFIG_AS_HAS_MOPS +alternative_if_not ARM64_HAS_MOPS + b .Lno_mops +alternative_else_nop_endif + cpy1 dst, src, count + b .Lexitfunc +.Lno_mops: +#endif + cmp count, #16 /*When memory length is less than 16, the accessed are not aligned.*/ b.lo .Ltiny15 diff --git a/arch/arm64/lib/copy_to_user.S b/arch/arm64/lib/copy_to_user.S index 8022317726085..819f2e3fc7a93 100644 --- a/arch/arm64/lib/copy_to_user.S +++ b/arch/arm64/lib/copy_to_user.S @@ -51,6 +51,13 @@ user_stp 9997f, \reg1, \reg2, \ptr, \val .endm + .macro cpy1 dst, src, count + .arch_extension mops + USER_CPY(9997f, 1, cpyfpwt [\dst]!, [\src]!, \count!) + USER_CPY(9996f, 1, cpyfmwt [\dst]!, [\src]!, \count!) + USER_CPY(9996f, 1, cpyfewt [\dst]!, [\src]!, \count!) + .endm + end .req x5 srcin .req x15 SYM_FUNC_START(__arch_copy_to_user) @@ -61,6 +68,9 @@ SYM_FUNC_START(__arch_copy_to_user) ret // Exception fixups +9996: b.cs 9997f + // Registers are in Option A format + add dst, dst, count 9997: cmp dst, dstin b.ne 9998f // Before being absolutely sure we couldn't copy anything, try harder diff --git a/arch/arm64/mm/extable.c b/arch/arm64/mm/extable.c index 228d681a87159..6e0528831cd3a 100644 --- a/arch/arm64/mm/extable.c +++ b/arch/arm64/mm/extable.c @@ -8,8 +8,33 @@ #include #include +#include #include +static bool cpy_faulted_on_uaccess(const struct exception_table_entry *ex, + unsigned long esr) +{ + bool uaccess_is_write = FIELD_GET(EX_DATA_UACCESS_WRITE, ex->data); + bool fault_on_write = esr & ESR_ELx_WNR; + + return uaccess_is_write == fault_on_write; +} + +bool insn_may_access_user(unsigned long addr, unsigned long esr) +{ + const struct exception_table_entry *ex = search_exception_tables(addr); + + if (!ex) + return false; + + switch (ex->type) { + case EX_TYPE_UACCESS_CPY: + return cpy_faulted_on_uaccess(ex, esr); + default: + return true; + } +} + static inline unsigned long get_ex_fixup(const struct exception_table_entry *ex) { @@ -29,6 +54,17 @@ static bool ex_handler_uaccess_err_zero(const struct exception_table_entry *ex, return true; } +static bool ex_handler_uaccess_cpy(const struct exception_table_entry *ex, + struct pt_regs *regs, unsigned long esr) +{ + /* Do not fix up faults on kernel memory accesses */ + if (!cpy_faulted_on_uaccess(ex, esr)) + return false; + + regs->pc = get_ex_fixup(ex); + return true; +} + static bool ex_handler_load_unaligned_zeropad(const struct exception_table_entry *ex, struct pt_regs *regs) @@ -56,7 +92,7 @@ ex_handler_load_unaligned_zeropad(const struct exception_table_entry *ex, return true; } -bool fixup_exception(struct pt_regs *regs) +bool fixup_exception(struct pt_regs *regs, unsigned long esr) { const struct exception_table_entry *ex; @@ -70,6 +106,8 @@ bool fixup_exception(struct pt_regs *regs) case EX_TYPE_UACCESS_ERR_ZERO: case EX_TYPE_KACCESS_ERR_ZERO: return ex_handler_uaccess_err_zero(ex, regs); + case EX_TYPE_UACCESS_CPY: + return ex_handler_uaccess_cpy(ex, regs, esr); case EX_TYPE_LOAD_UNALIGNED_ZEROPAD: return ex_handler_load_unaligned_zeropad(ex, regs); } diff --git a/arch/arm64/mm/fault.c b/arch/arm64/mm/fault.c index ef63651099a9d..ec0a337891ddf 100644 --- a/arch/arm64/mm/fault.c +++ b/arch/arm64/mm/fault.c @@ -375,7 +375,7 @@ static void __do_kernel_fault(unsigned long addr, unsigned long esr, * Are we prepared to handle this kernel fault? * We are almost certainly not prepared to handle instruction faults. */ - if (!is_el1_instruction_abort(esr) && fixup_exception(regs)) + if (!is_el1_instruction_abort(esr) && fixup_exception(regs, esr)) return; if (WARN_RATELIMIT(is_spurious_el1_translation_fault(addr, esr, regs), @@ -606,7 +606,7 @@ static int __kprobes do_page_fault(unsigned long far, unsigned long esr, die_kernel_fault("execution of user memory", addr, esr, regs); - if (!search_exception_tables(regs->pc)) + if (!insn_may_access_user(regs->pc, esr)) die_kernel_fault("access to user memory outside uaccess routines", addr, esr, regs); } diff --git a/arch/arm64/mm/hugetlbpage.c b/arch/arm64/mm/hugetlbpage.c index 98a2a0e64e255..fd7448bb8c806 100644 --- a/arch/arm64/mm/hugetlbpage.c +++ b/arch/arm64/mm/hugetlbpage.c @@ -342,7 +342,9 @@ unsigned long hugetlb_mask_last_page(struct hstate *h) switch (hp_size) { #ifndef __PAGETABLE_PMD_FOLDED case PUD_SIZE: - return PGDIR_SIZE - PUD_SIZE; + if (pud_sect_supported()) + return PGDIR_SIZE - PUD_SIZE; + break; #endif case CONT_PMD_SIZE: return PUD_SIZE - CONT_PMD_SIZE; @@ -364,23 +366,21 @@ pte_t arch_make_huge_pte(pte_t entry, unsigned int shift, vm_flags_t flags) switch (pagesize) { #ifndef __PAGETABLE_PMD_FOLDED case PUD_SIZE: - entry = pud_pte(pud_mkhuge(pte_pud(entry))); + if (pud_sect_supported()) + return pud_pte(pud_mkhuge(pte_pud(entry))); break; #endif case CONT_PMD_SIZE: - entry = pmd_pte(pmd_mkcont(pte_pmd(entry))); - fallthrough; + return pmd_pte(pmd_mkhuge(pmd_mkcont(pte_pmd(entry)))); case PMD_SIZE: - entry = pmd_pte(pmd_mkhuge(pte_pmd(entry))); - break; + return pmd_pte(pmd_mkhuge(pte_pmd(entry))); case CONT_PTE_SIZE: - entry = pte_mkcont(entry); - break; + return pte_mkcont(entry); default: - pr_warn("%s: unrecognized huge page size 0x%lx\n", - __func__, pagesize); break; } + pr_warn("%s: unrecognized huge page size 0x%lx\n", + __func__, pagesize); return entry; } diff --git a/arch/arm64/mm/kasan_init.c b/arch/arm64/mm/kasan_init.c index b65a29440a0c9..d541ce45daeb9 100644 --- a/arch/arm64/mm/kasan_init.c +++ b/arch/arm64/mm/kasan_init.c @@ -190,7 +190,7 @@ static void __init kasan_pgd_populate(unsigned long addr, unsigned long end, */ static bool __init root_level_aligned(u64 addr) { - int shift = (ARM64_HW_PGTABLE_LEVELS(vabits_actual) - 1) * (PAGE_SHIFT - 3); + int shift = (ARM64_HW_PGTABLE_LEVELS(vabits_actual) - 1) * PTDESC_TABLE_SHIFT; return (addr % (PAGE_SIZE << shift)) == 0; } @@ -245,7 +245,7 @@ static int __init root_level_idx(u64 addr) */ u64 vabits = IS_ENABLED(CONFIG_ARM64_64K_PAGES) ? VA_BITS : vabits_actual; - int shift = (ARM64_HW_PGTABLE_LEVELS(vabits) - 1) * (PAGE_SHIFT - 3); + int shift = (ARM64_HW_PGTABLE_LEVELS(vabits) - 1) * PTDESC_TABLE_SHIFT; return (addr & ~_PAGE_OFFSET(vabits)) >> (shift + PAGE_SHIFT); } @@ -269,7 +269,7 @@ static void __init clone_next_level(u64 addr, pgd_t *tmp_pg_dir, pud_t *pud) */ static int __init next_level_idx(u64 addr) { - int shift = (ARM64_HW_PGTABLE_LEVELS(vabits_actual) - 2) * (PAGE_SHIFT - 3); + int shift = (ARM64_HW_PGTABLE_LEVELS(vabits_actual) - 2) * PTDESC_TABLE_SHIFT; return (addr >> (shift + PAGE_SHIFT)) % PTRS_PER_PTE; } diff --git a/arch/arm64/mm/mmu.c b/arch/arm64/mm/mmu.c index b4df5bc5b1b8b..3c54dea1303ff 100644 --- a/arch/arm64/mm/mmu.c +++ b/arch/arm64/mm/mmu.c @@ -1555,9 +1555,8 @@ void __cpu_replace_ttbr1(pgd_t *pgdp, bool cnp) #ifdef CONFIG_ARCH_HAS_PKEYS int arch_set_user_pkey_access(struct task_struct *tsk, int pkey, unsigned long init_val) { - u64 new_por = POE_RXW; + u64 new_por; u64 old_por; - u64 pkey_shift; if (!system_supports_poe()) return -ENOSPC; @@ -1571,7 +1570,7 @@ int arch_set_user_pkey_access(struct task_struct *tsk, int pkey, unsigned long i return -EINVAL; /* Set the bits we need in POR: */ - new_por = POE_RXW; + new_por = POE_RWX; if (init_val & PKEY_DISABLE_WRITE) new_por &= ~POE_W; if (init_val & PKEY_DISABLE_ACCESS) @@ -1582,12 +1581,11 @@ int arch_set_user_pkey_access(struct task_struct *tsk, int pkey, unsigned long i new_por &= ~POE_X; /* Shift the bits in to the correct place in POR for pkey: */ - pkey_shift = pkey * POR_BITS_PER_PKEY; - new_por <<= pkey_shift; + new_por = POR_ELx_PERM_PREP(pkey, new_por); /* Get old POR and mask off any old bits in place: */ old_por = read_sysreg_s(SYS_POR_EL0); - old_por &= ~(POE_MASK << pkey_shift); + old_por &= ~(POE_MASK << POR_ELx_PERM_SHIFT(pkey)); /* Write old part along with new part: */ write_sysreg_s(old_por | new_por, SYS_POR_EL0); diff --git a/arch/arm64/mm/physaddr.c b/arch/arm64/mm/physaddr.c index cde44c13dda1b..7d94e09b01b35 100644 --- a/arch/arm64/mm/physaddr.c +++ b/arch/arm64/mm/physaddr.c @@ -10,7 +10,7 @@ phys_addr_t __virt_to_phys(unsigned long x) { WARN(!__is_lm_address(__tag_reset(x)), - "virt_to_phys used for non-linear address: %pK (%pS)\n", + "virt_to_phys used for non-linear address: %p (%pS)\n", (void *)x, (void *)x); diff --git a/arch/arm64/mm/ptdump.c b/arch/arm64/mm/ptdump.c index 688fbe0271ca4..8cec0da4cff28 100644 --- a/arch/arm64/mm/ptdump.c +++ b/arch/arm64/mm/ptdump.c @@ -80,8 +80,8 @@ static const struct ptdump_prot_bits pte_bits[] = { .set = "CON", .clear = " ", }, { - .mask = PTE_TABLE_BIT | PTE_VALID, - .val = PTE_VALID, + .mask = PMD_TYPE_MASK, + .val = PMD_TYPE_SECT, .set = "BLK", .clear = " ", }, { diff --git a/arch/arm64/tools/gen-sysreg.awk b/arch/arm64/tools/gen-sysreg.awk index 1a2afc9fdd42e..f2a1732cb1f63 100755 --- a/arch/arm64/tools/gen-sysreg.awk +++ b/arch/arm64/tools/gen-sysreg.awk @@ -111,7 +111,7 @@ END { /^$/ { next } /^[\t ]*#/ { next } -/^SysregFields/ && block_current() == "Root" { +$1 == "SysregFields" && block_current() == "Root" { block_push("SysregFields") expect_fields(2) @@ -127,7 +127,8 @@ END { next } -/^EndSysregFields/ && block_current() == "SysregFields" { +$1 == "EndSysregFields" && block_current() == "SysregFields" { + expect_fields(1) if (next_bit > 0) fatal("Unspecified bits in " reg) @@ -145,7 +146,7 @@ END { next } -/^Sysreg/ && block_current() == "Root" { +$1 == "Sysreg" && block_current() == "Root" { block_push("Sysreg") expect_fields(7) @@ -177,7 +178,8 @@ END { next } -/^EndSysreg/ && block_current() == "Sysreg" { +$1 == "EndSysreg" && block_current() == "Sysreg" { + expect_fields(1) if (next_bit > 0) fatal("Unspecified bits in " reg) @@ -206,7 +208,7 @@ END { # Currently this is effectivey a comment, in future we may want to emit # defines for the fields. -(/^Fields/ || /^Mapping/) && block_current() == "Sysreg" { +($1 == "Fields" || $1 == "Mapping") && block_current() == "Sysreg" { expect_fields(2) if (next_bit != 63) @@ -224,7 +226,7 @@ END { } -/^Res0/ && (block_current() == "Sysreg" || block_current() == "SysregFields") { +$1 == "Res0" && (block_current() == "Sysreg" || block_current() == "SysregFields") { expect_fields(2) parse_bitdef(reg, "RES0", $2) field = "RES0_" msb "_" lsb @@ -234,7 +236,7 @@ END { next } -/^Res1/ && (block_current() == "Sysreg" || block_current() == "SysregFields") { +$1 == "Res1" && (block_current() == "Sysreg" || block_current() == "SysregFields") { expect_fields(2) parse_bitdef(reg, "RES1", $2) field = "RES1_" msb "_" lsb @@ -244,7 +246,7 @@ END { next } -/^Unkn/ && (block_current() == "Sysreg" || block_current() == "SysregFields") { +$1 == "Unkn" && (block_current() == "Sysreg" || block_current() == "SysregFields") { expect_fields(2) parse_bitdef(reg, "UNKN", $2) field = "UNKN_" msb "_" lsb @@ -254,7 +256,7 @@ END { next } -/^Field/ && (block_current() == "Sysreg" || block_current() == "SysregFields") { +$1 == "Field" && (block_current() == "Sysreg" || block_current() == "SysregFields") { expect_fields(3) field = $3 parse_bitdef(reg, field, $2) @@ -265,14 +267,14 @@ END { next } -/^Raz/ && (block_current() == "Sysreg" || block_current() == "SysregFields") { +$1 == "Raz" && (block_current() == "Sysreg" || block_current() == "SysregFields") { expect_fields(2) parse_bitdef(reg, field, $2) next } -/^SignedEnum/ && (block_current() == "Sysreg" || block_current() == "SysregFields") { +$1 == "SignedEnum" && (block_current() == "Sysreg" || block_current() == "SysregFields") { block_push("Enum") expect_fields(3) @@ -285,7 +287,7 @@ END { next } -/^UnsignedEnum/ && (block_current() == "Sysreg" || block_current() == "SysregFields") { +$1 == "UnsignedEnum" && (block_current() == "Sysreg" || block_current() == "SysregFields") { block_push("Enum") expect_fields(3) @@ -298,7 +300,7 @@ END { next } -/^Enum/ && (block_current() == "Sysreg" || block_current() == "SysregFields") { +$1 == "Enum" && (block_current() == "Sysreg" || block_current() == "SysregFields") { block_push("Enum") expect_fields(3) @@ -310,7 +312,8 @@ END { next } -/^EndEnum/ && block_current() == "Enum" { +$1 == "EndEnum" && block_current() == "Enum" { + expect_fields(1) field = null msb = null diff --git a/arch/arm64/tools/sysreg b/arch/arm64/tools/sysreg index 762ee084b37c5..10809047fc871 100644 --- a/arch/arm64/tools/sysreg +++ b/arch/arm64/tools/sysreg @@ -1664,6 +1664,7 @@ EndEnum UnsignedEnum 59:56 FGT 0b0000 NI 0b0001 IMP + 0b0010 FGT2 EndEnum Res0 55:48 UnsignedEnum 47:44 EXS @@ -1725,6 +1726,7 @@ Enum 3:0 PARANGE 0b0100 44 0b0101 48 0b0110 52 + 0b0111 56 EndEnum EndSysreg @@ -2074,7 +2076,7 @@ EndEnum Res0 4:2 Field 1 ExTRE Field 0 E0TRE -EndSysregFields +EndSysreg Sysreg SMPRI_EL1 3 0 1 2 4 Res0 63:4 @@ -2641,6 +2643,101 @@ Field 0 E0HTRE EndSysreg +Sysreg HDFGRTR2_EL2 3 4 3 1 0 +Res0 63:25 +Field 24 nPMBMAR_EL1 +Field 23 nMDSTEPOP_EL1 +Field 22 nTRBMPAM_EL1 +Res0 21 +Field 20 nTRCITECR_EL1 +Field 19 nPMSDSFR_EL1 +Field 18 nSPMDEVAFF_EL1 +Field 17 nSPMID +Field 16 nSPMSCR_EL1 +Field 15 nSPMACCESSR_EL1 +Field 14 nSPMCR_EL0 +Field 13 nSPMOVS +Field 12 nSPMINTEN +Field 11 nSPMCNTEN +Field 10 nSPMSELR_EL0 +Field 9 nSPMEVTYPERn_EL0 +Field 8 nSPMEVCNTRn_EL0 +Field 7 nPMSSCR_EL1 +Field 6 nPMSSDATA +Field 5 nMDSELR_EL1 +Field 4 nPMUACR_EL1 +Field 3 nPMICFILTR_EL0 +Field 2 nPMICNTR_EL0 +Field 1 nPMIAR_EL1 +Field 0 nPMECR_EL1 +EndSysreg + +Sysreg HDFGWTR2_EL2 3 4 3 1 1 +Res0 63:25 +Field 24 nPMBMAR_EL1 +Field 23 nMDSTEPOP_EL1 +Field 22 nTRBMPAM_EL1 +Field 21 nPMZR_EL0 +Field 20 nTRCITECR_EL1 +Field 19 nPMSDSFR_EL1 +Res0 18:17 +Field 16 nSPMSCR_EL1 +Field 15 nSPMACCESSR_EL1 +Field 14 nSPMCR_EL0 +Field 13 nSPMOVS +Field 12 nSPMINTEN +Field 11 nSPMCNTEN +Field 10 nSPMSELR_EL0 +Field 9 nSPMEVTYPERn_EL0 +Field 8 nSPMEVCNTRn_EL0 +Field 7 nPMSSCR_EL1 +Res0 6 +Field 5 nMDSELR_EL1 +Field 4 nPMUACR_EL1 +Field 3 nPMICFILTR_EL0 +Field 2 nPMICNTR_EL0 +Field 1 nPMIAR_EL1 +Field 0 nPMECR_EL1 +EndSysreg + +Sysreg HFGRTR2_EL2 3 4 3 1 2 +Res0 63:15 +Field 14 nACTLRALIAS_EL1 +Field 13 nACTLRMASK_EL1 +Field 12 nTCR2ALIAS_EL1 +Field 11 nTCRALIAS_EL1 +Field 10 nSCTLRALIAS2_EL1 +Field 9 nSCTLRALIAS_EL1 +Field 8 nCPACRALIAS_EL1 +Field 7 nTCR2MASK_EL1 +Field 6 nTCRMASK_EL1 +Field 5 nSCTLR2MASK_EL1 +Field 4 nSCTLRMASK_EL1 +Field 3 nCPACRMASK_EL1 +Field 2 nRCWSMASK_EL1 +Field 1 nERXGSR_EL1 +Field 0 nPFAR_EL1 +EndSysreg + +Sysreg HFGWTR2_EL2 3 4 3 1 3 +Res0 63:15 +Field 14 nACTLRALIAS_EL1 +Field 13 nACTLRMASK_EL1 +Field 12 nTCR2ALIAS_EL1 +Field 11 nTCRALIAS_EL1 +Field 10 nSCTLRALIAS2_EL1 +Field 9 nSCTLRALIAS_EL1 +Field 8 nCPACRALIAS_EL1 +Field 7 nTCR2MASK_EL1 +Field 6 nTCRMASK_EL1 +Field 5 nSCTLR2MASK_EL1 +Field 4 nSCTLRMASK_EL1 +Field 3 nCPACRMASK_EL1 +Field 2 nRCWSMASK_EL1 +Res0 1 +Field 0 nPFAR_EL1 +EndSysreg + Sysreg HDFGRTR_EL2 3 4 3 1 4 Field 63 PMBIDR_EL1 Field 62 nPMSNEVFR_EL1 @@ -2813,6 +2910,12 @@ Field 1 AMEVCNTR00_EL0 Field 0 AMCNTEN0 EndSysreg +Sysreg HFGITR2_EL2 3 4 3 1 7 +Res0 63:2 +Field 1 nDCCIVAPS +Field 0 TSBCSYNC +EndSysreg + Sysreg ZCR_EL2 3 4 1 2 0 Fields ZCR_ELx EndSysreg diff --git a/arch/x86/kernel/cpu/aperfmperf.c b/arch/x86/kernel/cpu/aperfmperf.c index f642de2ebdac8..6cf31a1649c4b 100644 --- a/arch/x86/kernel/cpu/aperfmperf.c +++ b/arch/x86/kernel/cpu/aperfmperf.c @@ -498,7 +498,7 @@ void arch_scale_freq_tick(void) */ #define MAX_SAMPLE_AGE ((unsigned long)HZ / 50) -unsigned int arch_freq_get_on_cpu(int cpu) +int arch_freq_get_on_cpu(int cpu) { struct aperfmperf *s = per_cpu_ptr(&cpu_samples, cpu); unsigned int seq, freq; diff --git a/arch/x86/kernel/cpu/proc.c b/arch/x86/kernel/cpu/proc.c index 41ed01f46bd92..6571d432cbe33 100644 --- a/arch/x86/kernel/cpu/proc.c +++ b/arch/x86/kernel/cpu/proc.c @@ -86,9 +86,12 @@ static int show_cpuinfo(struct seq_file *m, void *v) seq_printf(m, "microcode\t: 0x%x\n", c->microcode); if (cpu_has(c, X86_FEATURE_TSC)) { - unsigned int freq = arch_freq_get_on_cpu(cpu); + int freq = arch_freq_get_on_cpu(cpu); - seq_printf(m, "cpu MHz\t\t: %u.%03u\n", freq / 1000, (freq % 1000)); + if (freq < 0) + seq_puts(m, "cpu MHz\t\t: Unknown\n"); + else + seq_printf(m, "cpu MHz\t\t: %u.%03u\n", freq / 1000, (freq % 1000)); } /* Cache size */ diff --git a/drivers/base/arch_topology.c b/drivers/base/arch_topology.c index 3ebe77566788b..8202447eb4300 100644 --- a/drivers/base/arch_topology.c +++ b/drivers/base/arch_topology.c @@ -28,7 +28,7 @@ static DEFINE_PER_CPU(struct scale_freq_data __rcu *, sft_data); static struct cpumask scale_freq_counters_mask; static bool scale_freq_invariant; -DEFINE_PER_CPU(unsigned long, capacity_freq_ref) = 1; +DEFINE_PER_CPU(unsigned long, capacity_freq_ref) = 0; EXPORT_PER_CPU_SYMBOL_GPL(capacity_freq_ref); static bool supports_scale_freq_counters(const struct cpumask *cpus) @@ -293,13 +293,15 @@ void topology_normalize_cpu_scale(void) capacity_scale = 1; for_each_possible_cpu(cpu) { - capacity = raw_capacity[cpu] * per_cpu(capacity_freq_ref, cpu); + capacity = raw_capacity[cpu] * + (per_cpu(capacity_freq_ref, cpu) ?: 1); capacity_scale = max(capacity, capacity_scale); } pr_debug("cpu_capacity: capacity_scale=%llu\n", capacity_scale); for_each_possible_cpu(cpu) { - capacity = raw_capacity[cpu] * per_cpu(capacity_freq_ref, cpu); + capacity = raw_capacity[cpu] * + (per_cpu(capacity_freq_ref, cpu) ?: 1); capacity = div64_u64(capacity << SCHED_CAPACITY_SHIFT, capacity_scale); topology_set_cpu_scale(cpu, capacity); diff --git a/drivers/cpufreq/Kconfig.x86 b/drivers/cpufreq/Kconfig.x86 index 97c2d4f15d76e..2c5c228408bf2 100644 --- a/drivers/cpufreq/Kconfig.x86 +++ b/drivers/cpufreq/Kconfig.x86 @@ -340,3 +340,15 @@ config X86_SPEEDSTEP_RELAXED_CAP_CHECK option lets the probing code bypass some of those checks if the parameter "relaxed_check=1" is passed to the module. +config CPUFREQ_ARCH_CUR_FREQ + default y + bool "Current frequency derived from HW provided feedback" + help + This determines whether the scaling_cur_freq sysfs attribute returns + the last requested frequency or a more precise value based on hardware + provided feedback (as architected counters). + Given that a more precise frequency can now be provided via the + cpuinfo_avg_freq attribute, by enabling this option, + scaling_cur_freq maintains the provision of a counter based frequency, + for compatibility reasons. + diff --git a/drivers/cpufreq/cpufreq.c b/drivers/cpufreq/cpufreq.c index 30ffbddc7ecec..0ce79fed8e55d 100644 --- a/drivers/cpufreq/cpufreq.c +++ b/drivers/cpufreq/cpufreq.c @@ -729,18 +729,26 @@ show_one(cpuinfo_transition_latency, cpuinfo.transition_latency); show_one(scaling_min_freq, min); show_one(scaling_max_freq, max); -__weak unsigned int arch_freq_get_on_cpu(int cpu) +__weak int arch_freq_get_on_cpu(int cpu) { - return 0; + return -EOPNOTSUPP; +} + +static inline bool cpufreq_avg_freq_supported(struct cpufreq_policy *policy) +{ + return arch_freq_get_on_cpu(policy->cpu) != -EOPNOTSUPP; } static ssize_t show_scaling_cur_freq(struct cpufreq_policy *policy, char *buf) { ssize_t ret; - unsigned int freq; + int freq; + + freq = IS_ENABLED(CONFIG_CPUFREQ_ARCH_CUR_FREQ) + ? arch_freq_get_on_cpu(policy->cpu) + : 0; - freq = arch_freq_get_on_cpu(policy->cpu); - if (freq) + if (freq > 0) ret = sysfs_emit(buf, "%u\n", freq); else if (cpufreq_driver->setpolicy && cpufreq_driver->get) ret = sysfs_emit(buf, "%u\n", cpufreq_driver->get(policy->cpu)); @@ -784,6 +792,19 @@ static ssize_t show_cpuinfo_cur_freq(struct cpufreq_policy *policy, return sysfs_emit(buf, "\n"); } +/* + * show_cpuinfo_avg_freq - average CPU frequency as detected by hardware + */ +static ssize_t show_cpuinfo_avg_freq(struct cpufreq_policy *policy, + char *buf) +{ + int avg_freq = arch_freq_get_on_cpu(policy->cpu); + + if (avg_freq > 0) + return sysfs_emit(buf, "%u\n", avg_freq); + return avg_freq != 0 ? avg_freq : -EINVAL; +} + /* * show_scaling_governor - show the current policy for the specified CPU */ @@ -946,6 +967,7 @@ static ssize_t show_bios_limit(struct cpufreq_policy *policy, char *buf) } cpufreq_freq_attr_ro_perm(cpuinfo_cur_freq, 0400); +cpufreq_freq_attr_ro(cpuinfo_avg_freq); cpufreq_freq_attr_ro(cpuinfo_min_freq); cpufreq_freq_attr_ro(cpuinfo_max_freq); cpufreq_freq_attr_ro(cpuinfo_transition_latency); @@ -1073,6 +1095,12 @@ static int cpufreq_add_dev_interface(struct cpufreq_policy *policy) return ret; } + if (cpufreq_avg_freq_supported(policy)) { + ret = sysfs_create_file(&policy->kobj, &cpuinfo_avg_freq.attr); + if (ret) + return ret; + } + ret = sysfs_create_file(&policy->kobj, &scaling_cur_freq.attr); if (ret) return ret; diff --git a/include/linux/cpufreq.h b/include/linux/cpufreq.h index 7fe0981a7e467..02fd4746231da 100644 --- a/include/linux/cpufreq.h +++ b/include/linux/cpufreq.h @@ -1184,7 +1184,7 @@ static inline int of_perf_domain_get_sharing_cpumask(int pcpu, const char *list_ } #endif -extern unsigned int arch_freq_get_on_cpu(int cpu); +extern int arch_freq_get_on_cpu(int cpu); #ifndef arch_set_freq_scale static __always_inline diff --git a/include/linux/dma-direct.h b/include/linux/dma-direct.h index d7e30d4f7503a..f3bc0bcd70980 100644 --- a/include/linux/dma-direct.h +++ b/include/linux/dma-direct.h @@ -78,14 +78,18 @@ static inline dma_addr_t dma_range_map_max(const struct bus_dma_region *map) #define phys_to_dma_unencrypted phys_to_dma #endif #else -static inline dma_addr_t phys_to_dma_unencrypted(struct device *dev, - phys_addr_t paddr) +static inline dma_addr_t __phys_to_dma(struct device *dev, phys_addr_t paddr) { if (dev->dma_range_map) return translate_phys_to_dma(dev, paddr); return paddr; } +static inline dma_addr_t phys_to_dma_unencrypted(struct device *dev, + phys_addr_t paddr) +{ + return dma_addr_unencrypted(__phys_to_dma(dev, paddr)); +} /* * If memory encryption is supported, phys_to_dma will set the memory encryption * bit in the DMA address, and dma_to_phys will clear it. @@ -94,19 +98,20 @@ static inline dma_addr_t phys_to_dma_unencrypted(struct device *dev, */ static inline dma_addr_t phys_to_dma(struct device *dev, phys_addr_t paddr) { - return __sme_set(phys_to_dma_unencrypted(dev, paddr)); + return dma_addr_encrypted(__phys_to_dma(dev, paddr)); } static inline phys_addr_t dma_to_phys(struct device *dev, dma_addr_t dma_addr) { phys_addr_t paddr; + dma_addr = dma_addr_canonical(dma_addr); if (dev->dma_range_map) paddr = translate_dma_to_phys(dev, dma_addr); else paddr = dma_addr; - return __sme_clr(paddr); + return paddr; } #endif /* !CONFIG_ARCH_HAS_PHYS_TO_DMA */ diff --git a/include/linux/mem_encrypt.h b/include/linux/mem_encrypt.h index ae45263892611..07584c5e36fb4 100644 --- a/include/linux/mem_encrypt.h +++ b/include/linux/mem_encrypt.h @@ -26,11 +26,34 @@ */ #define __sme_set(x) ((x) | sme_me_mask) #define __sme_clr(x) ((x) & ~sme_me_mask) + +#define dma_addr_encrypted(x) __sme_set(x) +#define dma_addr_canonical(x) __sme_clr(x) + #else #define __sme_set(x) (x) #define __sme_clr(x) (x) #endif +/* + * dma_addr_encrypted() and dma_addr_unencrypted() are for converting a given DMA + * address to the respective type of addressing. + * + * dma_addr_canonical() is used to reverse any conversions for encrypted/decrypted + * back to the canonical address. + */ +#ifndef dma_addr_encrypted +#define dma_addr_encrypted(x) (x) +#endif + +#ifndef dma_addr_unencrypted +#define dma_addr_unencrypted(x) (x) +#endif + +#ifndef dma_addr_canonical +#define dma_addr_canonical(x) (x) +#endif + #endif /* __ASSEMBLY__ */ #endif /* __MEM_ENCRYPT_H__ */ diff --git a/include/uapi/asm-generic/mman-common.h b/include/uapi/asm-generic/mman-common.h index 1ea2c4c33b86a..ef1c27fa3c570 100644 --- a/include/uapi/asm-generic/mman-common.h +++ b/include/uapi/asm-generic/mman-common.h @@ -85,6 +85,7 @@ /* compatibility flags */ #define MAP_FILE 0 +#define PKEY_UNRESTRICTED 0x0 #define PKEY_DISABLE_ACCESS 0x1 #define PKEY_DISABLE_WRITE 0x2 #define PKEY_ACCESS_MASK (PKEY_DISABLE_ACCESS |\ diff --git a/tools/testing/selftests/arm64/mte/check_hugetlb_options.c b/tools/testing/selftests/arm64/mte/check_hugetlb_options.c index 303260a6dc65b..3bfcd3848432b 100644 --- a/tools/testing/selftests/arm64/mte/check_hugetlb_options.c +++ b/tools/testing/selftests/arm64/mte/check_hugetlb_options.c @@ -227,6 +227,8 @@ static int check_child_hugetlb_memory_mapping(int mem_type, int mode, int mappin int main(int argc, char *argv[]) { int err; + void *map_ptr; + unsigned long map_size; err = mte_default_setup(); if (err) @@ -243,6 +245,15 @@ int main(int argc, char *argv[]) return KSFT_FAIL; } + /* Check if MTE supports hugetlb mappings */ + map_size = default_huge_page_size(); + map_ptr = mmap(NULL, map_size, PROT_READ | PROT_MTE, + MAP_PRIVATE | MAP_ANONYMOUS | MAP_HUGETLB, -1, 0); + if (map_ptr == MAP_FAILED) + ksft_exit_skip("PROT_MTE not supported with MAP_HUGETLB mappings\n"); + else + munmap(map_ptr, map_size); + /* Set test plan */ ksft_set_plan(12); @@ -270,13 +281,13 @@ int main(int argc, char *argv[]) "Check clear PROT_MTE flags with private mapping and sync error mode and mmap/mprotect memory\n"); evaluate_test(check_child_hugetlb_memory_mapping(USE_MMAP, MTE_SYNC_ERR, MAP_PRIVATE | MAP_HUGETLB), - "Check child hugetlb memory with private mapping, precise mode and mmap memory\n"); + "Check child hugetlb memory with private mapping, sync error mode and mmap memory\n"); evaluate_test(check_child_hugetlb_memory_mapping(USE_MMAP, MTE_ASYNC_ERR, MAP_PRIVATE | MAP_HUGETLB), - "Check child hugetlb memory with private mapping, precise mode and mmap memory\n"); + "Check child hugetlb memory with private mapping, async error mode and mmap memory\n"); evaluate_test(check_child_hugetlb_memory_mapping(USE_MPROTECT, MTE_SYNC_ERR, MAP_PRIVATE | MAP_HUGETLB), - "Check child hugetlb memory with private mapping, precise mode and mmap/mprotect memory\n"); + "Check child hugetlb memory with private mapping, sync error mode and mmap/mprotect memory\n"); evaluate_test(check_child_hugetlb_memory_mapping(USE_MPROTECT, MTE_ASYNC_ERR, MAP_PRIVATE | MAP_HUGETLB), - "Check child hugetlb memory with private mapping, precise mode and mmap/mprotect memory\n"); + "Check child hugetlb memory with private mapping, async error mode and mmap/mprotect memory\n"); mte_restore_setup(); free_hugetlb(); diff --git a/tools/testing/selftests/mm/mseal_test.c b/tools/testing/selftests/mm/mseal_test.c index ad17005521a8e..005f29c86484e 100644 --- a/tools/testing/selftests/mm/mseal_test.c +++ b/tools/testing/selftests/mm/mseal_test.c @@ -218,7 +218,7 @@ bool seal_support(void) bool pkey_supported(void) { #if defined(__i386__) || defined(__x86_64__) /* arch */ - int pkey = sys_pkey_alloc(0, 0); + int pkey = sys_pkey_alloc(0, PKEY_UNRESTRICTED); if (pkey > 0) return true; @@ -1671,7 +1671,7 @@ static void test_seal_discard_ro_anon_on_pkey(bool seal) setup_single_address_rw(size, &ptr); FAIL_TEST_IF_FALSE(ptr != (void *)-1); - pkey = sys_pkey_alloc(0, 0); + pkey = sys_pkey_alloc(0, PKEY_UNRESTRICTED); FAIL_TEST_IF_FALSE(pkey > 0); ret = sys_mprotect_pkey((void *)ptr, size, PROT_READ | PROT_WRITE, pkey); @@ -1683,7 +1683,7 @@ static void test_seal_discard_ro_anon_on_pkey(bool seal) } /* sealing doesn't take effect if PKRU allow write. */ - set_pkey(pkey, 0); + set_pkey(pkey, PKEY_UNRESTRICTED); ret = sys_madvise(ptr, size, MADV_DONTNEED); FAIL_TEST_IF_FALSE(!ret); diff --git a/tools/testing/selftests/mm/pkey-helpers.h b/tools/testing/selftests/mm/pkey-helpers.h index f080e97b39bea..ea404f80e6cb9 100644 --- a/tools/testing/selftests/mm/pkey-helpers.h +++ b/tools/testing/selftests/mm/pkey-helpers.h @@ -13,6 +13,7 @@ #include #include +#include #include #include "../kselftest.h" @@ -193,7 +194,7 @@ static inline u32 *siginfo_get_pkey_ptr(siginfo_t *si) static inline int kernel_has_pkeys(void) { /* try allocating a key and see if it succeeds */ - int ret = sys_pkey_alloc(0, 0); + int ret = sys_pkey_alloc(0, PKEY_UNRESTRICTED); if (ret <= 0) { return 0; } diff --git a/tools/testing/selftests/mm/pkey_sighandler_tests.c b/tools/testing/selftests/mm/pkey_sighandler_tests.c index 1ac8c88098807..b5e076a564c95 100644 --- a/tools/testing/selftests/mm/pkey_sighandler_tests.c +++ b/tools/testing/selftests/mm/pkey_sighandler_tests.c @@ -311,7 +311,7 @@ static void test_sigsegv_handler_with_different_pkey_for_stack(void) __write_pkey_reg(pkey_reg); /* Protect the new stack with MPK 1 */ - pkey = sys_pkey_alloc(0, 0); + pkey = sys_pkey_alloc(0, PKEY_UNRESTRICTED); sys_mprotect_pkey(stack, STACK_SIZE, PROT_READ | PROT_WRITE, pkey); /* Set up alternate signal stack that will use the default MPK */ @@ -484,7 +484,7 @@ static void test_pkru_sigreturn(void) __write_pkey_reg(pkey_reg); /* Protect the stack with MPK 2 */ - pkey = sys_pkey_alloc(0, 0); + pkey = sys_pkey_alloc(0, PKEY_UNRESTRICTED); sys_mprotect_pkey(stack, STACK_SIZE, PROT_READ | PROT_WRITE, pkey); /* Set up alternate signal stack that will use the default MPK */ diff --git a/tools/testing/selftests/mm/protection_keys.c b/tools/testing/selftests/mm/protection_keys.c index a4683f2476f27..434d8a8dc637c 100644 --- a/tools/testing/selftests/mm/protection_keys.c +++ b/tools/testing/selftests/mm/protection_keys.c @@ -463,7 +463,7 @@ static pid_t fork_lazy_child(void) static int alloc_pkey(void) { int ret; - unsigned long init_val = 0x0; + unsigned long init_val = PKEY_UNRESTRICTED; dprintf1("%s()::%d, pkey_reg: 0x%016llx shadow: %016llx\n", __func__, __LINE__, __read_pkey_reg(), shadow_pkey_reg); diff --git a/tools/testing/selftests/powerpc/include/pkeys.h b/tools/testing/selftests/powerpc/include/pkeys.h index 3a0129467de64..d6deb6ffa1b9b 100644 --- a/tools/testing/selftests/powerpc/include/pkeys.h +++ b/tools/testing/selftests/powerpc/include/pkeys.h @@ -24,6 +24,9 @@ #undef PKEY_DISABLE_EXECUTE #define PKEY_DISABLE_EXECUTE 0x4 +#undef PKEY_UNRESTRICTED +#define PKEY_UNRESTRICTED 0x0 + /* Older versions of libc do not define this */ #ifndef SEGV_PKUERR #define SEGV_PKUERR 4 @@ -93,7 +96,7 @@ int pkeys_unsupported(void) SKIP_IF(!hash_mmu); /* Check if the system call is supported */ - pkey = sys_pkey_alloc(0, 0); + pkey = sys_pkey_alloc(0, PKEY_UNRESTRICTED); SKIP_IF(pkey < 0); sys_pkey_free(pkey); diff --git a/tools/testing/selftests/powerpc/mm/pkey_exec_prot.c b/tools/testing/selftests/powerpc/mm/pkey_exec_prot.c index 0af4f02669a11..29b91b7456eb9 100644 --- a/tools/testing/selftests/powerpc/mm/pkey_exec_prot.c +++ b/tools/testing/selftests/powerpc/mm/pkey_exec_prot.c @@ -72,7 +72,7 @@ static void segv_handler(int signum, siginfo_t *sinfo, void *ctx) switch (fault_type) { case PKEY_DISABLE_ACCESS: - pkey_set_rights(fault_pkey, 0); + pkey_set_rights(fault_pkey, PKEY_UNRESTRICTED); break; case PKEY_DISABLE_EXECUTE: /* diff --git a/tools/testing/selftests/powerpc/mm/pkey_siginfo.c b/tools/testing/selftests/powerpc/mm/pkey_siginfo.c index 2db76e56d4cb9..e89a164c686ba 100644 --- a/tools/testing/selftests/powerpc/mm/pkey_siginfo.c +++ b/tools/testing/selftests/powerpc/mm/pkey_siginfo.c @@ -83,7 +83,7 @@ static void segv_handler(int signum, siginfo_t *sinfo, void *ctx) mprotect(pgstart, pgsize, PROT_EXEC)) _exit(1); else - pkey_set_rights(pkey, 0); + pkey_set_rights(pkey, PKEY_UNRESTRICTED); fault_count++; } diff --git a/tools/testing/selftests/powerpc/ptrace/core-pkey.c b/tools/testing/selftests/powerpc/ptrace/core-pkey.c index f061434af452b..7ff53caeb4aa9 100644 --- a/tools/testing/selftests/powerpc/ptrace/core-pkey.c +++ b/tools/testing/selftests/powerpc/ptrace/core-pkey.c @@ -95,16 +95,16 @@ static int child(struct shared_info *info) /* Get some pkeys so that we can change their bits in the AMR. */ pkey1 = sys_pkey_alloc(0, PKEY_DISABLE_EXECUTE); if (pkey1 < 0) { - pkey1 = sys_pkey_alloc(0, 0); + pkey1 = sys_pkey_alloc(0, PKEY_UNRESTRICTED); FAIL_IF(pkey1 < 0); disable_execute = false; } - pkey2 = sys_pkey_alloc(0, 0); + pkey2 = sys_pkey_alloc(0, PKEY_UNRESTRICTED); FAIL_IF(pkey2 < 0); - pkey3 = sys_pkey_alloc(0, 0); + pkey3 = sys_pkey_alloc(0, PKEY_UNRESTRICTED); FAIL_IF(pkey3 < 0); info->amr |= 3ul << pkeyshift(pkey1) | 2ul << pkeyshift(pkey2); diff --git a/tools/testing/selftests/powerpc/ptrace/ptrace-pkey.c b/tools/testing/selftests/powerpc/ptrace/ptrace-pkey.c index fc633014424f7..10f63042cf91b 100644 --- a/tools/testing/selftests/powerpc/ptrace/ptrace-pkey.c +++ b/tools/testing/selftests/powerpc/ptrace/ptrace-pkey.c @@ -57,16 +57,16 @@ static int child(struct shared_info *info) /* Get some pkeys so that we can change their bits in the AMR. */ pkey1 = sys_pkey_alloc(0, PKEY_DISABLE_EXECUTE); if (pkey1 < 0) { - pkey1 = sys_pkey_alloc(0, 0); + pkey1 = sys_pkey_alloc(0, PKEY_UNRESTRICTED); CHILD_FAIL_IF(pkey1 < 0, &info->child_sync); disable_execute = false; } - pkey2 = sys_pkey_alloc(0, 0); + pkey2 = sys_pkey_alloc(0, PKEY_UNRESTRICTED); CHILD_FAIL_IF(pkey2 < 0, &info->child_sync); - pkey3 = sys_pkey_alloc(0, 0); + pkey3 = sys_pkey_alloc(0, PKEY_UNRESTRICTED); CHILD_FAIL_IF(pkey3 < 0, &info->child_sync); info->amr1 |= 3ul << pkeyshift(pkey1);