Skip to content

Commit

Permalink
Merge tag 'x86_urgent_for_v5.13-rc5' of git://git.kernel.org/pub/scm/…
Browse files Browse the repository at this point in the history
…linux/kernel/git/tip/tip

Pull x86 fixes from Borislav Petkov:
 "A bunch of x86/urgent stuff accumulated for the last two weeks so
  lemme unload it to you.

  It should be all totally risk-free, of course. :-)

   - Fix out-of-spec hardware (1st gen Hygon) which does not implement
     MSR_AMD64_SEV even though the spec clearly states so, and check
     CPUID bits first.

   - Send only one signal to a task when it is a SEGV_PKUERR si_code
     type.

   - Do away with all the wankery of reserving X amount of memory in the
     first megabyte to prevent BIOS corrupting it and simply and
     unconditionally reserve the whole first megabyte.

   - Make alternatives NOP optimization work at an arbitrary position
     within the patched sequence because the compiler can put
     single-byte NOPs for alignment anywhere in the sequence (32-bit
     retpoline), vs our previous assumption that the NOPs are only
     appended.

   - Force-disable ENQCMD[S] instructions support and remove
     update_pasid() because of insufficient protection against FPU state
     modification in an interrupt context, among other xstate horrors
     which are being addressed at the moment. This one limits the
     fallout until proper enablement.

   - Use cpu_feature_enabled() in the idxd driver so that it can be
     build-time disabled through the defines in disabled-features.h.

   - Fix LVT thermal setup for SMI delivery mode by making sure the APIC
     LVT value is read before APIC initialization so that softlockups
     during boot do not happen at least on one machine.

   - Mark all legacy interrupts as legacy vectors when the IO-APIC is
     disabled and when all legacy interrupts are routed through the PIC"

* tag 'x86_urgent_for_v5.13-rc5' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip:
  x86/sev: Check SME/SEV support in CPUID first
  x86/fault: Don't send SIGSEGV twice on SEGV_PKUERR
  x86/setup: Always reserve the first 1M of RAM
  x86/alternative: Optimize single-byte NOPs at an arbitrary position
  x86/cpufeatures: Force disable X86_FEATURE_ENQCMD and remove update_pasid()
  dmaengine: idxd: Use cpu_feature_enabled()
  x86/thermal: Fix LVT thermal setup for SMI delivery mode
  x86/apic: Mark _all_ legacy interrupts when IO/APIC is missing
  • Loading branch information
Linus Torvalds committed Jun 6, 2021
2 parents f5b6eb1 + 009767d commit 773ac53
Show file tree
Hide file tree
Showing 16 changed files with 145 additions and 126 deletions.
1 change: 1 addition & 0 deletions arch/x86/include/asm/apic.h
Original file line number Diff line number Diff line change
Expand Up @@ -174,6 +174,7 @@ static inline int apic_is_clustered_box(void)
extern int setup_APIC_eilvt(u8 lvt_off, u8 vector, u8 msg_type, u8 mask);
extern void lapic_assign_system_vectors(void);
extern void lapic_assign_legacy_vector(unsigned int isairq, bool replace);
extern void lapic_update_legacy_vectors(void);
extern void lapic_online(void);
extern void lapic_offline(void);
extern bool apic_needs_pit(void);
Expand Down
7 changes: 2 additions & 5 deletions arch/x86/include/asm/disabled-features.h
Original file line number Diff line number Diff line change
Expand Up @@ -56,11 +56,8 @@
# define DISABLE_PTI (1 << (X86_FEATURE_PTI & 31))
#endif

#ifdef CONFIG_IOMMU_SUPPORT
# define DISABLE_ENQCMD 0
#else
# define DISABLE_ENQCMD (1 << (X86_FEATURE_ENQCMD & 31))
#endif
/* Force disable because it's broken beyond repair */
#define DISABLE_ENQCMD (1 << (X86_FEATURE_ENQCMD & 31))

#ifdef CONFIG_X86_SGX
# define DISABLE_SGX 0
Expand Down
6 changes: 1 addition & 5 deletions arch/x86/include/asm/fpu/api.h
Original file line number Diff line number Diff line change
Expand Up @@ -106,10 +106,6 @@ extern int cpu_has_xfeatures(u64 xfeatures_mask, const char **feature_name);
*/
#define PASID_DISABLED 0

#ifdef CONFIG_IOMMU_SUPPORT
/* Update current's PASID MSR/state by mm's PASID. */
void update_pasid(void);
#else
static inline void update_pasid(void) { }
#endif

#endif /* _ASM_X86_FPU_API_H */
7 changes: 0 additions & 7 deletions arch/x86/include/asm/fpu/internal.h
Original file line number Diff line number Diff line change
Expand Up @@ -584,13 +584,6 @@ static inline void switch_fpu_finish(struct fpu *new_fpu)
pkru_val = pk->pkru;
}
__write_pkru(pkru_val);

/*
* Expensive PASID MSR write will be avoided in update_pasid() because
* TIF_NEED_FPU_LOAD was set. And the PASID state won't be updated
* unless it's different from mm->pasid to reduce overhead.
*/
update_pasid();
}

#endif /* _ASM_X86_FPU_INTERNAL_H */
4 changes: 3 additions & 1 deletion arch/x86/include/asm/thermal.h
Original file line number Diff line number Diff line change
Expand Up @@ -3,11 +3,13 @@
#define _ASM_X86_THERMAL_H

#ifdef CONFIG_X86_THERMAL_VECTOR
void therm_lvt_init(void);
void intel_init_thermal(struct cpuinfo_x86 *c);
bool x86_thermal_enabled(void);
void intel_thermal_interrupt(void);
#else
static inline void intel_init_thermal(struct cpuinfo_x86 *c) { }
static inline void therm_lvt_init(void) { }
static inline void intel_init_thermal(struct cpuinfo_x86 *c) { }
#endif

#endif /* _ASM_X86_THERMAL_H */
64 changes: 46 additions & 18 deletions arch/x86/kernel/alternative.c
Original file line number Diff line number Diff line change
Expand Up @@ -182,42 +182,70 @@ recompute_jump(struct alt_instr *a, u8 *orig_insn, u8 *repl_insn, u8 *insn_buff)
n_dspl, (unsigned long)orig_insn + n_dspl + repl_len);
}

/*
* optimize_nops_range() - Optimize a sequence of single byte NOPs (0x90)
*
* @instr: instruction byte stream
* @instrlen: length of the above
* @off: offset within @instr where the first NOP has been detected
*
* Return: number of NOPs found (and replaced).
*/
static __always_inline int optimize_nops_range(u8 *instr, u8 instrlen, int off)
{
unsigned long flags;
int i = off, nnops;

while (i < instrlen) {
if (instr[i] != 0x90)
break;

i++;
}

nnops = i - off;

if (nnops <= 1)
return nnops;

local_irq_save(flags);
add_nops(instr + off, nnops);
local_irq_restore(flags);

DUMP_BYTES(instr, instrlen, "%px: [%d:%d) optimized NOPs: ", instr, off, i);

return nnops;
}

/*
* "noinline" to cause control flow change and thus invalidate I$ and
* cause refetch after modification.
*/
static void __init_or_module noinline optimize_nops(struct alt_instr *a, u8 *instr)
{
unsigned long flags;
struct insn insn;
int nop, i = 0;
int i = 0;

/*
* Jump over the non-NOP insns, the remaining bytes must be single-byte
* NOPs, optimize them.
* Jump over the non-NOP insns and optimize single-byte NOPs into bigger
* ones.
*/
for (;;) {
if (insn_decode_kernel(&insn, &instr[i]))
return;

/*
* See if this and any potentially following NOPs can be
* optimized.
*/
if (insn.length == 1 && insn.opcode.bytes[0] == 0x90)
break;

if ((i += insn.length) >= a->instrlen)
return;
}
i += optimize_nops_range(instr, a->instrlen, i);
else
i += insn.length;

for (nop = i; i < a->instrlen; i++) {
if (WARN_ONCE(instr[i] != 0x90, "Not a NOP at 0x%px\n", &instr[i]))
if (i >= a->instrlen)
return;
}

local_irq_save(flags);
add_nops(instr + nop, i - nop);
local_irq_restore(flags);

DUMP_BYTES(instr, a->instrlen, "%px: [%d:%d) optimized NOPs: ",
instr, nop, a->instrlen);
}

/*
Expand Down
1 change: 1 addition & 0 deletions arch/x86/kernel/apic/apic.c
Original file line number Diff line number Diff line change
Expand Up @@ -2604,6 +2604,7 @@ static void __init apic_bsp_setup(bool upmode)
end_local_APIC_setup();
irq_remap_enable_fault_handling();
setup_IO_APIC();
lapic_update_legacy_vectors();
}

#ifdef CONFIG_UP_LATE_INIT
Expand Down
20 changes: 20 additions & 0 deletions arch/x86/kernel/apic/vector.c
Original file line number Diff line number Diff line change
Expand Up @@ -738,6 +738,26 @@ void lapic_assign_legacy_vector(unsigned int irq, bool replace)
irq_matrix_assign_system(vector_matrix, ISA_IRQ_VECTOR(irq), replace);
}

void __init lapic_update_legacy_vectors(void)
{
unsigned int i;

if (IS_ENABLED(CONFIG_X86_IO_APIC) && nr_ioapics > 0)
return;

/*
* If the IO/APIC is disabled via config, kernel command line or
* lack of enumeration then all legacy interrupts are routed
* through the PIC. Make sure that they are marked as legacy
* vectors. PIC_CASCADE_IRQ has already been marked in
* lapic_assign_system_vectors().
*/
for (i = 0; i < nr_legacy_irqs(); i++) {
if (i != PIC_CASCADE_IR)
lapic_assign_legacy_vector(i, true);
}
}

void __init lapic_assign_system_vectors(void)
{
unsigned int i, vector = 0;
Expand Down
57 changes: 0 additions & 57 deletions arch/x86/kernel/fpu/xstate.c
Original file line number Diff line number Diff line change
Expand Up @@ -1402,60 +1402,3 @@ int proc_pid_arch_status(struct seq_file *m, struct pid_namespace *ns,
return 0;
}
#endif /* CONFIG_PROC_PID_ARCH_STATUS */

#ifdef CONFIG_IOMMU_SUPPORT
void update_pasid(void)
{
u64 pasid_state;
u32 pasid;

if (!cpu_feature_enabled(X86_FEATURE_ENQCMD))
return;

if (!current->mm)
return;

pasid = READ_ONCE(current->mm->pasid);
/* Set the valid bit in the PASID MSR/state only for valid pasid. */
pasid_state = pasid == PASID_DISABLED ?
pasid : pasid | MSR_IA32_PASID_VALID;

/*
* No need to hold fregs_lock() since the task's fpstate won't
* be changed by others (e.g. ptrace) while the task is being
* switched to or is in IPI.
*/
if (!test_thread_flag(TIF_NEED_FPU_LOAD)) {
/* The MSR is active and can be directly updated. */
wrmsrl(MSR_IA32_PASID, pasid_state);
} else {
struct fpu *fpu = &current->thread.fpu;
struct ia32_pasid_state *ppasid_state;
struct xregs_state *xsave;

/*
* The CPU's xstate registers are not currently active. Just
* update the PASID state in the memory buffer here. The
* PASID MSR will be loaded when returning to user mode.
*/
xsave = &fpu->state.xsave;
xsave->header.xfeatures |= XFEATURE_MASK_PASID;
ppasid_state = get_xsave_addr(xsave, XFEATURE_PASID);
/*
* Since XFEATURE_MASK_PASID is set in xfeatures, ppasid_state
* won't be NULL and no need to check its value.
*
* Only update the task's PASID state when it's different
* from the mm's pasid.
*/
if (ppasid_state->pasid != pasid_state) {
/*
* Invalid fpregs so that state restoring will pick up
* the PASID state.
*/
__fpu_invalidate_fpregs_state(fpu);
ppasid_state->pasid = pasid_state;
}
}
}
#endif /* CONFIG_IOMMU_SUPPORT */
44 changes: 30 additions & 14 deletions arch/x86/kernel/setup.c
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,7 @@
#include <asm/pci-direct.h>
#include <asm/prom.h>
#include <asm/proto.h>
#include <asm/thermal.h>
#include <asm/unwind.h>
#include <asm/vsyscall.h>
#include <linux/vmalloc.h>
Expand Down Expand Up @@ -637,11 +638,11 @@ static void __init trim_snb_memory(void)
* them from accessing certain memory ranges, namely anything below
* 1M and in the pages listed in bad_pages[] above.
*
* To avoid these pages being ever accessed by SNB gfx devices
* reserve all memory below the 1 MB mark and bad_pages that have
* not already been reserved at boot time.
* To avoid these pages being ever accessed by SNB gfx devices reserve
* bad_pages that have not already been reserved at boot time.
* All memory below the 1 MB mark is anyway reserved later during
* setup_arch(), so there is no need to reserve it here.
*/
memblock_reserve(0, 1<<20);

for (i = 0; i < ARRAY_SIZE(bad_pages); i++) {
if (memblock_reserve(bad_pages[i], PAGE_SIZE))
Expand Down Expand Up @@ -733,14 +734,14 @@ static void __init early_reserve_memory(void)
* The first 4Kb of memory is a BIOS owned area, but generally it is
* not listed as such in the E820 table.
*
* Reserve the first memory page and typically some additional
* memory (64KiB by default) since some BIOSes are known to corrupt
* low memory. See the Kconfig help text for X86_RESERVE_LOW.
* Reserve the first 64K of memory since some BIOSes are known to
* corrupt low memory. After the real mode trampoline is allocated the
* rest of the memory below 640k is reserved.
*
* In addition, make sure page 0 is always reserved because on
* systems with L1TF its contents can be leaked to user processes.
*/
memblock_reserve(0, ALIGN(reserve_low, PAGE_SIZE));
memblock_reserve(0, SZ_64K);

early_reserve_initrd();

Expand All @@ -751,6 +752,7 @@ static void __init early_reserve_memory(void)

reserve_ibft_region();
reserve_bios_regions();
trim_snb_memory();
}

/*
Expand Down Expand Up @@ -1081,14 +1083,20 @@ void __init setup_arch(char **cmdline_p)
(max_pfn_mapped<<PAGE_SHIFT) - 1);
#endif

reserve_real_mode();

/*
* Reserving memory causing GPU hangs on Sandy Bridge integrated
* graphics devices should be done after we allocated memory under
* 1M for the real mode trampoline.
* Find free memory for the real mode trampoline and place it
* there.
* If there is not enough free memory under 1M, on EFI-enabled
* systems there will be additional attempt to reclaim the memory
* for the real mode trampoline at efi_free_boot_services().
*
* Unconditionally reserve the entire first 1M of RAM because
* BIOSes are know to corrupt low memory and several
* hundred kilobytes are not worth complex detection what memory gets
* clobbered. Moreover, on machines with SandyBridge graphics or in
* setups that use crashkernel the entire 1M is reserved anyway.
*/
trim_snb_memory();
reserve_real_mode();

init_mem_mapping();

Expand Down Expand Up @@ -1226,6 +1234,14 @@ void __init setup_arch(char **cmdline_p)

x86_init.timers.wallclock_init();

/*
* This needs to run before setup_local_APIC() which soft-disables the
* local APIC temporarily and that masks the thermal LVT interrupt,
* leading to softlockups on machines which have configured SMI
* interrupt delivery.
*/
therm_lvt_init();

mcheck_init();

register_refined_jiffies(CLOCK_TICK_RATE);
Expand Down
4 changes: 2 additions & 2 deletions arch/x86/mm/fault.c
Original file line number Diff line number Diff line change
Expand Up @@ -836,8 +836,8 @@ __bad_area_nosemaphore(struct pt_regs *regs, unsigned long error_code,

if (si_code == SEGV_PKUERR)
force_sig_pkuerr((void __user *)address, pkey);

force_sig_fault(SIGSEGV, si_code, (void __user *)address);
else
force_sig_fault(SIGSEGV, si_code, (void __user *)address);

local_irq_disable();
}
Expand Down
11 changes: 6 additions & 5 deletions arch/x86/mm/mem_encrypt_identity.c
Original file line number Diff line number Diff line change
Expand Up @@ -504,10 +504,6 @@ void __init sme_enable(struct boot_params *bp)
#define AMD_SME_BIT BIT(0)
#define AMD_SEV_BIT BIT(1)

/* Check the SEV MSR whether SEV or SME is enabled */
sev_status = __rdmsr(MSR_AMD64_SEV);
feature_mask = (sev_status & MSR_AMD64_SEV_ENABLED) ? AMD_SEV_BIT : AMD_SME_BIT;

/*
* Check for the SME/SEV feature:
* CPUID Fn8000_001F[EAX]
Expand All @@ -519,11 +515,16 @@ void __init sme_enable(struct boot_params *bp)
eax = 0x8000001f;
ecx = 0;
native_cpuid(&eax, &ebx, &ecx, &edx);
if (!(eax & feature_mask))
/* Check whether SEV or SME is supported */
if (!(eax & (AMD_SEV_BIT | AMD_SME_BIT)))
return;

me_mask = 1UL << (ebx & 0x3f);

/* Check the SEV MSR whether SEV or SME is enabled */
sev_status = __rdmsr(MSR_AMD64_SEV);
feature_mask = (sev_status & MSR_AMD64_SEV_ENABLED) ? AMD_SEV_BIT : AMD_SME_BIT;

/* Check if memory encryption is enabled */
if (feature_mask == AMD_SME_BIT) {
/*
Expand Down
12 changes: 12 additions & 0 deletions arch/x86/platform/efi/quirks.c
Original file line number Diff line number Diff line change
Expand Up @@ -450,6 +450,18 @@ void __init efi_free_boot_services(void)
size -= rm_size;
}

/*
* Don't free memory under 1M for two reasons:
* - BIOS might clobber it
* - Crash kernel needs it to be reserved
*/
if (start + size < SZ_1M)
continue;
if (start < SZ_1M) {
size -= (SZ_1M - start);
start = SZ_1M;
}

memblock_free_late(start, size);
}

Expand Down
Loading

0 comments on commit 773ac53

Please sign in to comment.