diff --git a/Documentation/DocBook/media/v4l/media-types.xml b/Documentation/DocBook/media/v4l/media-types.xml
index 1af3842509105..0ee0f3386cdf3 100644
--- a/Documentation/DocBook/media/v4l/media-types.xml
+++ b/Documentation/DocBook/media/v4l/media-types.xml
@@ -56,10 +56,6 @@
MEDIA_ENT_F_CONN_COMPOSITE
Connector for a RGB composite signal.
-
- MEDIA_ENT_F_CONN_TEST
- Connector for a test generator.
-
MEDIA_ENT_F_CAM_SENSOR
Camera video sensor entity.
diff --git a/Documentation/devicetree/bindings/arm/omap/omap.txt b/Documentation/devicetree/bindings/arm/omap/omap.txt
index a2bd593881cab..66422d6631841 100644
--- a/Documentation/devicetree/bindings/arm/omap/omap.txt
+++ b/Documentation/devicetree/bindings/arm/omap/omap.txt
@@ -23,6 +23,7 @@ Optional properties:
during suspend.
- ti,no-reset-on-init: When present, the module should not be reset at init
- ti,no-idle-on-init: When present, the module should not be idled at init
+- ti,no-idle: When present, the module is never allowed to idle.
Example:
diff --git a/Documentation/devicetree/bindings/interrupt-controller/al,alpine-msix.txt b/Documentation/devicetree/bindings/interrupt-controller/al,alpine-msix.txt
new file mode 100644
index 0000000000000..f6f1c14bf99bd
--- /dev/null
+++ b/Documentation/devicetree/bindings/interrupt-controller/al,alpine-msix.txt
@@ -0,0 +1,26 @@
+Alpine MSIX controller
+
+See arm,gic-v3.txt for SPI and MSI definitions.
+
+Required properties:
+
+- compatible: should be "al,alpine-msix"
+- reg: physical base address and size of the registers
+- interrupt-parent: specifies the parent interrupt controller.
+- interrupt-controller: identifies the node as an interrupt controller
+- msi-controller: identifies the node as an PCI Message Signaled Interrupt
+ controller
+- al,msi-base-spi: SPI base of the MSI frame
+- al,msi-num-spis: number of SPIs assigned to the MSI frame, relative to SPI0
+
+Example:
+
+msix: msix {
+ compatible = "al,alpine-msix";
+ reg = <0x0 0xfbe00000 0x0 0x100000>;
+ interrupt-parent = <&gic>;
+ interrupt-controller;
+ msi-controller;
+ al,msi-base-spi = <160>;
+ al,msi-num-spis = <160>;
+};
diff --git a/Documentation/devicetree/bindings/interrupt-controller/arm,gic.txt b/Documentation/devicetree/bindings/interrupt-controller/arm,gic.txt
index 5a1cb4bc3dfe8..793c20ff8fccc 100644
--- a/Documentation/devicetree/bindings/interrupt-controller/arm,gic.txt
+++ b/Documentation/devicetree/bindings/interrupt-controller/arm,gic.txt
@@ -16,6 +16,7 @@ Main node required properties:
"arm,cortex-a15-gic"
"arm,cortex-a7-gic"
"arm,cortex-a9-gic"
+ "arm,eb11mp-gic"
"arm,gic-400"
"arm,pl390"
"arm,tc11mp-gic"
diff --git a/Documentation/devicetree/bindings/interrupt-controller/marvell,odmi-controller.txt b/Documentation/devicetree/bindings/interrupt-controller/marvell,odmi-controller.txt
new file mode 100644
index 0000000000000..8af0a8e613abe
--- /dev/null
+++ b/Documentation/devicetree/bindings/interrupt-controller/marvell,odmi-controller.txt
@@ -0,0 +1,44 @@
+
+* Marvell ODMI for MSI support
+
+Some Marvell SoCs have an On-Die Message Interrupt (ODMI) controller
+which can be used by on-board peripheral for MSI interrupts.
+
+Required properties:
+
+- compatible : The value here should contain:
+
+ "marvell,ap806-odmi-controller", "marvell,odmi-controller".
+
+- interrupt,controller : Identifies the node as an interrupt controller.
+
+- msi-controller : Identifies the node as an MSI controller.
+
+- marvell,odmi-frames : Number of ODMI frames available. Each frame
+ provides a number of events.
+
+- reg : List of register definitions, one for each
+ ODMI frame.
+
+- marvell,spi-base : List of GIC base SPI interrupts, one for each
+ ODMI frame. Those SPI interrupts are 0-based,
+ i.e marvell,spi-base = <128> will use SPI #96.
+ See Documentation/devicetree/bindings/interrupt-controller/arm,gic.txt
+ for details about the GIC Device Tree binding.
+
+- interrupt-parent : Reference to the parent interrupt controller.
+
+Example:
+
+ odmi: odmi@300000 {
+ compatible = "marvell,ap806-odm-controller",
+ "marvell,odmi-controller";
+ interrupt-controller;
+ msi-controller;
+ marvell,odmi-frames = <4>;
+ reg = <0x300000 0x4000>,
+ <0x304000 0x4000>,
+ <0x308000 0x4000>,
+ <0x30C000 0x4000>;
+ marvell,spi-base = <128>, <136>, <144>, <152>;
+ };
diff --git a/Documentation/devicetree/bindings/interrupt-controller/mips-gic.txt b/Documentation/devicetree/bindings/interrupt-controller/mips-gic.txt
index aae4c384ee1f2..173595305e266 100644
--- a/Documentation/devicetree/bindings/interrupt-controller/mips-gic.txt
+++ b/Documentation/devicetree/bindings/interrupt-controller/mips-gic.txt
@@ -23,6 +23,12 @@ Optional properties:
- mti,reserved-cpu-vectors : Specifies the list of CPU interrupt vectors
to which the GIC may not route interrupts. Valid values are 2 - 7.
This property is ignored if the CPU is started in EIC mode.
+- mti,reserved-ipi-vectors : Specifies the range of GIC interrupts that are
+ reserved for IPIs.
+ It accepts 2 values, the 1st is the starting interrupt and the 2nd is the size
+ of the reserved range.
+ If not specified, the driver will allocate the last 2 * number of VPEs in the
+ system.
Required properties for timer sub-node:
- compatible : Should be "mti,gic-timer".
@@ -44,6 +50,7 @@ Example:
#interrupt-cells = <3>;
mti,reserved-cpu-vectors = <7>;
+ mti,reserved-ipi-vectors = <40 8>;
timer {
compatible = "mti,gic-timer";
diff --git a/Documentation/devicetree/bindings/interrupt-controller/sigma,smp8642-intc.txt b/Documentation/devicetree/bindings/interrupt-controller/sigma,smp8642-intc.txt
new file mode 100644
index 0000000000000..1f441fa0ad401
--- /dev/null
+++ b/Documentation/devicetree/bindings/interrupt-controller/sigma,smp8642-intc.txt
@@ -0,0 +1,49 @@
+Sigma Designs SMP86xx/SMP87xx secondary interrupt controller
+
+Required properties:
+- compatible: should be "sigma,smp8642-intc"
+- reg: physical address of MMIO region
+- ranges: address space mapping of child nodes
+- interrupt-parent: phandle of parent interrupt controller
+- interrupt-controller: boolean
+- #address-cells: should be <1>
+- #size-cells: should be <1>
+
+One child node per control block with properties:
+- reg: address of registers for this control block
+- interrupt-controller: boolean
+- #interrupt-cells: should be <2>, interrupt index and flags per interrupts.txt
+- interrupts: interrupt spec of primary interrupt controller
+
+Example:
+
+interrupt-controller@6e000 {
+ compatible = "sigma,smp8642-intc";
+ reg = <0x6e000 0x400>;
+ ranges = <0x0 0x6e000 0x400>;
+ interrupt-parent = <&gic>;
+ interrupt-controller;
+ #address-cells = <1>;
+ #size-cells = <1>;
+
+ irq0: interrupt-controller@0 {
+ reg = <0x000 0x100>;
+ interrupt-controller;
+ #interrupt-cells = <2>;
+ interrupts = ;
+ };
+
+ irq1: interrupt-controller@100 {
+ reg = <0x100 0x100>;
+ interrupt-controller;
+ #interrupt-cells = <2>;
+ interrupts = ;
+ };
+
+ irq2: interrupt-controller@300 {
+ reg = <0x300 0x100>;
+ interrupt-controller;
+ #interrupt-cells = <2>;
+ interrupts = ;
+ };
+};
diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt
index 9a53c929f017d..4d9ca7d92a20a 100644
--- a/Documentation/kernel-parameters.txt
+++ b/Documentation/kernel-parameters.txt
@@ -666,7 +666,7 @@ bytes respectively. Such letter suffixes can also be entirely omitted.
clearcpuid=BITNUM [X86]
Disable CPUID feature X for the kernel. See
- arch/x86/include/asm/cpufeature.h for the valid bit
+ arch/x86/include/asm/cpufeatures.h for the valid bit
numbers. Note the Linux specific bits are not necessarily
stable over kernel options, but the vendor specific
ones should be.
@@ -1687,6 +1687,15 @@ bytes respectively. Such letter suffixes can also be entirely omitted.
ip= [IP_PNP]
See Documentation/filesystems/nfs/nfsroot.txt.
+ irqaffinity= [SMP] Set the default irq affinity mask
+ Format:
+ ,...,
+ or
+ -
+ (must be a positive range in ascending order)
+ or a mixture
+ ,...,-
+
irqfixup [HW]
When an interrupt is not handled search all handlers
for it. Intended to get systems with badly broken
@@ -2566,6 +2575,8 @@ bytes respectively. Such letter suffixes can also be entirely omitted.
nointroute [IA-64]
+ noinvpcid [X86] Disable the INVPCID cpu feature.
+
nojitter [IA-64] Disables jitter checking for ITC timers.
no-kvmclock [X86,KVM] Disable paravirtualized KVM clock driver
@@ -3491,6 +3502,10 @@ bytes respectively. Such letter suffixes can also be entirely omitted.
ro [KNL] Mount root device read-only on boot
+ rodata= [KNL]
+ on Mark read-only kernel memory as read-only (default).
+ off Leave read-only kernel memory writable for debugging.
+
root= [KNL] Root filesystem
See name_to_dev_t comment in init/do_mounts.c.
@@ -3528,6 +3543,11 @@ bytes respectively. Such letter suffixes can also be entirely omitted.
sched_debug [KNL] Enables verbose scheduler debug messages.
+ schedstats= [KNL,X86] Enable or disable scheduled statistics.
+ Allowed values are enable and disable. This feature
+ incurs a small amount of overhead in the scheduler
+ but is useful for debugging and performance tuning.
+
skew_tick= [KNL] Offset the periodic timer tick per cpu to mitigate
xtime_lock contention on larger systems, and/or RCU lock
contention on all systems with CONFIG_MAXSMP set.
diff --git a/Documentation/ptp/testptp.c b/Documentation/ptp/testptp.c
index 6c6247aaa7b93..d99012f41602a 100644
--- a/Documentation/ptp/testptp.c
+++ b/Documentation/ptp/testptp.c
@@ -277,13 +277,15 @@ int main(int argc, char *argv[])
" %d external time stamp channels\n"
" %d programmable periodic signals\n"
" %d pulse per second\n"
- " %d programmable pins\n",
+ " %d programmable pins\n"
+ " %d cross timestamping\n",
caps.max_adj,
caps.n_alarm,
caps.n_ext_ts,
caps.n_per_out,
caps.pps,
- caps.n_pins);
+ caps.n_pins,
+ caps.cross_timestamping);
}
}
diff --git a/Documentation/sysctl/kernel.txt b/Documentation/sysctl/kernel.txt
index a93b414672a71..f4444c94ff285 100644
--- a/Documentation/sysctl/kernel.txt
+++ b/Documentation/sysctl/kernel.txt
@@ -58,6 +58,8 @@ show up in /proc/sys/kernel:
- panic_on_stackoverflow
- panic_on_unrecovered_nmi
- panic_on_warn
+- perf_cpu_time_max_percent
+- perf_event_paranoid
- pid_max
- powersave-nap [ PPC only ]
- printk
@@ -639,6 +641,17 @@ allowed to execute.
==============================================================
+perf_event_paranoid:
+
+Controls use of the performance events system by unprivileged
+users (without CAP_SYS_ADMIN). The default value is 1.
+
+ -1: Allow use of (almost) all events by all users
+>=0: Disallow raw tracepoint access by users without CAP_IOC_LOCK
+>=1: Disallow CPU event access by users without CAP_SYS_ADMIN
+>=2: Disallow kernel profiling by users without CAP_SYS_ADMIN
+
+==============================================================
pid_max:
@@ -760,6 +773,14 @@ rtsig-nr shows the number of RT signals currently queued.
==============================================================
+sched_schedstats:
+
+Enables/disables scheduler statistics. Enabling this feature
+incurs a small amount of overhead in the scheduler but is
+useful for debugging and performance tuning.
+
+==============================================================
+
sg-big-buff:
This file shows the size of the generic SCSI (sg) buffer.
diff --git a/Documentation/virtual/kvm/mmu.txt b/Documentation/virtual/kvm/mmu.txt
index daf9c0f742d22..c81731096a433 100644
--- a/Documentation/virtual/kvm/mmu.txt
+++ b/Documentation/virtual/kvm/mmu.txt
@@ -358,7 +358,8 @@ In the first case there are two additional complications:
- if CR4.SMEP is enabled: since we've turned the page into a kernel page,
the kernel may now execute it. We handle this by also setting spte.nx.
If we get a user fetch or read fault, we'll change spte.u=1 and
- spte.nx=gpte.nx back.
+ spte.nx=gpte.nx back. For this to work, KVM forces EFER.NX to 1 when
+ shadow paging is in use.
- if CR4.SMAP is disabled: since the page has been changed to a kernel
page, it can not be reused when CR4.SMAP is enabled. We set
CR4.SMAP && !CR0.WP into shadow page's role to avoid this case. Note,
diff --git a/Documentation/watchdog/watchdog-parameters.txt b/Documentation/watchdog/watchdog-parameters.txt
index 9f9ec9f760394..4e4b6f10d8410 100644
--- a/Documentation/watchdog/watchdog-parameters.txt
+++ b/Documentation/watchdog/watchdog-parameters.txt
@@ -400,3 +400,7 @@ wm8350_wdt:
nowayout: Watchdog cannot be stopped once started
(default=kernel config parameter)
-------------------------------------------------
+sun4v_wdt:
+timeout_ms: Watchdog timeout in milliseconds 1..180000, default=60000)
+nowayout: Watchdog cannot be stopped once started
+-------------------------------------------------
diff --git a/Documentation/x86/early-microcode.txt b/Documentation/x86/early-microcode.txt
index d62bea6796dad..c956d99cf1de4 100644
--- a/Documentation/x86/early-microcode.txt
+++ b/Documentation/x86/early-microcode.txt
@@ -40,3 +40,28 @@ cp ../microcode.bin kernel/x86/microcode/GenuineIntel.bin (or AuthenticAMD.bin)
find . | cpio -o -H newc >../ucode.cpio
cd ..
cat ucode.cpio /boot/initrd-3.5.0.img >/boot/initrd-3.5.0.ucode.img
+
+Builtin microcode
+=================
+
+We can also load builtin microcode supplied through the regular firmware
+builtin method CONFIG_FIRMWARE_IN_KERNEL. Here's an example:
+
+CONFIG_FIRMWARE_IN_KERNEL=y
+CONFIG_EXTRA_FIRMWARE="intel-ucode/06-3a-09 amd-ucode/microcode_amd_fam15h.bin"
+CONFIG_EXTRA_FIRMWARE_DIR="/lib/firmware"
+
+This basically means, you have the following tree structure locally:
+
+/lib/firmware/
+|-- amd-ucode
+...
+| |-- microcode_amd_fam15h.bin
+...
+|-- intel-ucode
+...
+| |-- 06-3a-09
+...
+
+so that the build system can find those files and integrate them into
+the final kernel image. The early loader finds them and applies them.
diff --git a/Documentation/x86/exception-tables.txt b/Documentation/x86/exception-tables.txt
index 32901aa36f0a9..e396bcd8d8304 100644
--- a/Documentation/x86/exception-tables.txt
+++ b/Documentation/x86/exception-tables.txt
@@ -290,3 +290,38 @@ Due to the way that the exception table is built and needs to be ordered,
only use exceptions for code in the .text section. Any other section
will cause the exception table to not be sorted correctly, and the
exceptions will fail.
+
+Things changed when 64-bit support was added to x86 Linux. Rather than
+double the size of the exception table by expanding the two entries
+from 32-bits to 64 bits, a clever trick was used to store addresses
+as relative offsets from the table itself. The assembly code changed
+from:
+ .long 1b,3b
+to:
+ .long (from) - .
+ .long (to) - .
+
+and the C-code that uses these values converts back to absolute addresses
+like this:
+
+ ex_insn_addr(const struct exception_table_entry *x)
+ {
+ return (unsigned long)&x->insn + x->insn;
+ }
+
+In v4.6 the exception table entry was expanded with a new field "handler".
+This is also 32-bits wide and contains a third relative function
+pointer which points to one of:
+
+1) int ex_handler_default(const struct exception_table_entry *fixup)
+ This is legacy case that just jumps to the fixup code
+2) int ex_handler_fault(const struct exception_table_entry *fixup)
+ This case provides the fault number of the trap that occurred at
+ entry->insn. It is used to distinguish page faults from machine
+ check.
+3) int ex_handler_ext(const struct exception_table_entry *fixup)
+ This case is used for uaccess_err ... we need to set a flag
+ in the task structure. Before the handler functions existed this
+ case was handled by adding a large offset to the fixup to tag
+ it as special.
+More functions can easily be added.
diff --git a/Documentation/x86/x86_64/boot-options.txt b/Documentation/x86/x86_64/boot-options.txt
index 68ed3114c363b..0965a71f99424 100644
--- a/Documentation/x86/x86_64/boot-options.txt
+++ b/Documentation/x86/x86_64/boot-options.txt
@@ -60,6 +60,8 @@ Machine check
threshold to 1. Enabling this may make memory predictive failure
analysis less effective if the bios sets thresholds for memory
errors since we will not see details for all errors.
+ mce=recovery
+ Force-enable recoverable machine check code paths
nomce (for compatibility with i386): same as mce=off
diff --git a/MAINTAINERS b/MAINTAINERS
index da3e4d8016d0f..57adf395a61f9 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -2422,6 +2422,7 @@ F: arch/mips/bmips/*
F: arch/mips/include/asm/mach-bmips/*
F: arch/mips/kernel/*bmips*
F: arch/mips/boot/dts/brcm/bcm*.dts*
+F: drivers/irqchip/irq-bcm63*
F: drivers/irqchip/irq-bcm7*
F: drivers/irqchip/irq-brcmstb*
F: include/linux/bcm963xx_nvram.h
@@ -4518,6 +4519,12 @@ L: linuxppc-dev@lists.ozlabs.org
S: Maintained
F: drivers/dma/fsldma.*
+FREESCALE GPMI NAND DRIVER
+M: Han Xu
+L: linux-mtd@lists.infradead.org
+S: Maintained
+F: drivers/mtd/nand/gpmi-nand/*
+
FREESCALE I2C CPM DRIVER
M: Jochen Friedrich
L: linuxppc-dev@lists.ozlabs.org
@@ -4534,7 +4541,7 @@ F: include/linux/platform_data/video-imxfb.h
F: drivers/video/fbdev/imxfb.c
FREESCALE QUAD SPI DRIVER
-M: Han Xu
+M: Han Xu
L: linux-mtd@lists.infradead.org
S: Maintained
F: drivers/mtd/spi-nor/fsl-quadspi.c
@@ -4548,6 +4555,15 @@ S: Maintained
F: drivers/net/ethernet/freescale/fs_enet/
F: include/linux/fs_enet_pd.h
+FREESCALE IMX / MXC FEC DRIVER
+M: Fugang Duan
+L: netdev@vger.kernel.org
+S: Maintained
+F: drivers/net/ethernet/freescale/fec_main.c
+F: drivers/net/ethernet/freescale/fec_ptp.c
+F: drivers/net/ethernet/freescale/fec.h
+F: Documentation/devicetree/bindings/net/fsl-fec.txt
+
FREESCALE QUICC ENGINE LIBRARY
L: linuxppc-dev@lists.ozlabs.org
S: Orphan
@@ -6764,6 +6780,7 @@ S: Maintained
F: Documentation/networking/mac80211-injection.txt
F: include/net/mac80211.h
F: net/mac80211/
+F: drivers/net/wireless/mac80211_hwsim.[ch]
MACVLAN DRIVER
M: Patrick McHardy
@@ -7383,6 +7400,17 @@ W: https://www.myricom.com/support/downloads/myri10ge.html
S: Supported
F: drivers/net/ethernet/myricom/myri10ge/
+NAND FLASH SUBSYSTEM
+M: Boris Brezillon
+R: Richard Weinberger
+L: linux-mtd@lists.infradead.org
+W: http://www.linux-mtd.infradead.org/
+Q: http://patchwork.ozlabs.org/project/linux-mtd/list/
+T: git git://github.com/linux-nand/linux.git
+S: Maintained
+F: drivers/mtd/nand/
+F: include/linux/mtd/nand*.h
+
NATSEMI ETHERNET DRIVER (DP8381x)
S: Orphan
F: drivers/net/ethernet/natsemi/natsemi.c
@@ -8448,6 +8476,7 @@ PERFORMANCE EVENTS SUBSYSTEM
M: Peter Zijlstra
M: Ingo Molnar
M: Arnaldo Carvalho de Melo
+R: Alexander Shishkin
L: linux-kernel@vger.kernel.org
T: git git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip.git perf/core
S: Supported
diff --git a/Makefile b/Makefile
index af6e5f893d56e..7b3ecdcdc6c18 100644
--- a/Makefile
+++ b/Makefile
@@ -1,7 +1,7 @@
VERSION = 4
PATCHLEVEL = 5
SUBLEVEL = 0
-EXTRAVERSION = -rc6
+EXTRAVERSION =
NAME = Blurry Fish Butt
# *DOCUMENTATION*
diff --git a/arch/alpha/kernel/smp.c b/arch/alpha/kernel/smp.c
index 2f24447fef920..46bf263c31531 100644
--- a/arch/alpha/kernel/smp.c
+++ b/arch/alpha/kernel/smp.c
@@ -168,7 +168,7 @@ smp_callin(void)
cpuid, current, current->active_mm));
preempt_disable();
- cpu_startup_entry(CPUHP_ONLINE);
+ cpu_startup_entry(CPUHP_AP_ONLINE_IDLE);
}
/* Wait until hwrpb->txrdy is clear for cpu. Return -1 on timeout. */
diff --git a/arch/arc/kernel/smp.c b/arch/arc/kernel/smp.c
index 424e937da5c89..4cb3add77c75c 100644
--- a/arch/arc/kernel/smp.c
+++ b/arch/arc/kernel/smp.c
@@ -142,7 +142,7 @@ void start_kernel_secondary(void)
local_irq_enable();
preempt_disable();
- cpu_startup_entry(CPUHP_ONLINE);
+ cpu_startup_entry(CPUHP_AP_ONLINE_IDLE);
}
/*
diff --git a/arch/arm/boot/compressed/Makefile b/arch/arm/boot/compressed/Makefile
index 7a6a58ef8aaf8..43788b1a1ac5e 100644
--- a/arch/arm/boot/compressed/Makefile
+++ b/arch/arm/boot/compressed/Makefile
@@ -195,5 +195,7 @@ CFLAGS_font.o := -Dstatic=
$(obj)/font.c: $(FONTC)
$(call cmd,shipped)
+AFLAGS_hyp-stub.o := -Wa,-march=armv7-a
+
$(obj)/hyp-stub.S: $(srctree)/arch/$(SRCARCH)/kernel/hyp-stub.S
$(call cmd,shipped)
diff --git a/arch/arm/boot/dts/armada-xp-axpwifiap.dts b/arch/arm/boot/dts/armada-xp-axpwifiap.dts
index 23fc670c04277..5c21b236721fc 100644
--- a/arch/arm/boot/dts/armada-xp-axpwifiap.dts
+++ b/arch/arm/boot/dts/armada-xp-axpwifiap.dts
@@ -70,8 +70,8 @@
soc {
ranges = ;
+ MBUS_ID(0x09, 0x09) 0 0 0xf1100000 0x10000
+ MBUS_ID(0x09, 0x05) 0 0 0xf1110000 0x10000>;
pcie-controller {
status = "okay";
diff --git a/arch/arm/boot/dts/armada-xp-db.dts b/arch/arm/boot/dts/armada-xp-db.dts
index f774101416a55..ebe1d267406df 100644
--- a/arch/arm/boot/dts/armada-xp-db.dts
+++ b/arch/arm/boot/dts/armada-xp-db.dts
@@ -76,8 +76,8 @@
ranges = ;
+ MBUS_ID(0x09, 0x09) 0 0 0xf1100000 0x10000
+ MBUS_ID(0x09, 0x05) 0 0 0xf1110000 0x10000>;
devbus-bootcs {
status = "okay";
diff --git a/arch/arm/boot/dts/armada-xp-gp.dts b/arch/arm/boot/dts/armada-xp-gp.dts
index 4878d7353069f..5730b875c4f51 100644
--- a/arch/arm/boot/dts/armada-xp-gp.dts
+++ b/arch/arm/boot/dts/armada-xp-gp.dts
@@ -95,8 +95,8 @@
ranges = ;
+ MBUS_ID(0x09, 0x09) 0 0 0xf1100000 0x10000
+ MBUS_ID(0x09, 0x05) 0 0 0xf1110000 0x10000>;
devbus-bootcs {
status = "okay";
diff --git a/arch/arm/boot/dts/armada-xp-lenovo-ix4-300d.dts b/arch/arm/boot/dts/armada-xp-lenovo-ix4-300d.dts
index fb9e1bbf23385..8af463f26ea1e 100644
--- a/arch/arm/boot/dts/armada-xp-lenovo-ix4-300d.dts
+++ b/arch/arm/boot/dts/armada-xp-lenovo-ix4-300d.dts
@@ -65,8 +65,8 @@
soc {
ranges = ;
+ MBUS_ID(0x09, 0x09) 0 0 0xf1100000 0x10000
+ MBUS_ID(0x09, 0x05) 0 0 0xf1110000 0x10000>;
pcie-controller {
status = "okay";
diff --git a/arch/arm/boot/dts/armada-xp-linksys-mamba.dts b/arch/arm/boot/dts/armada-xp-linksys-mamba.dts
index 6e9820e141f8d..b89e6cf1271a1 100644
--- a/arch/arm/boot/dts/armada-xp-linksys-mamba.dts
+++ b/arch/arm/boot/dts/armada-xp-linksys-mamba.dts
@@ -70,8 +70,8 @@
soc {
ranges = ;
+ MBUS_ID(0x09, 0x09) 0 0 0xf1100000 0x10000
+ MBUS_ID(0x09, 0x05) 0 0 0xf1110000 0x10000>;
pcie-controller {
status = "okay";
diff --git a/arch/arm/boot/dts/armada-xp-matrix.dts b/arch/arm/boot/dts/armada-xp-matrix.dts
index 6ab33837a2b6d..6522b04f4a8e7 100644
--- a/arch/arm/boot/dts/armada-xp-matrix.dts
+++ b/arch/arm/boot/dts/armada-xp-matrix.dts
@@ -68,8 +68,8 @@
soc {
ranges = ;
+ MBUS_ID(0x09, 0x09) 0 0 0xf1100000 0x10000
+ MBUS_ID(0x09, 0x05) 0 0 0xf1110000 0x10000>;
internal-regs {
serial@12000 {
diff --git a/arch/arm/boot/dts/armada-xp-netgear-rn2120.dts b/arch/arm/boot/dts/armada-xp-netgear-rn2120.dts
index 62175a8848bc2..d19f44c709257 100644
--- a/arch/arm/boot/dts/armada-xp-netgear-rn2120.dts
+++ b/arch/arm/boot/dts/armada-xp-netgear-rn2120.dts
@@ -64,8 +64,8 @@
soc {
ranges = ;
+ MBUS_ID(0x09, 0x09) 0 0 0xf1100000 0x10000
+ MBUS_ID(0x09, 0x05) 0 0 0xf1110000 0x10000>;
pcie-controller {
status = "okay";
diff --git a/arch/arm/boot/dts/armada-xp-openblocks-ax3-4.dts b/arch/arm/boot/dts/armada-xp-openblocks-ax3-4.dts
index a5db17782e085..853bd392a4fe2 100644
--- a/arch/arm/boot/dts/armada-xp-openblocks-ax3-4.dts
+++ b/arch/arm/boot/dts/armada-xp-openblocks-ax3-4.dts
@@ -65,9 +65,9 @@
soc {
ranges = ;
+ MBUS_ID(0x01, 0x2f) 0 0 0xe8000000 0x8000000
+ MBUS_ID(0x09, 0x09) 0 0 0xf1100000 0x10000
+ MBUS_ID(0x09, 0x05) 0 0 0xf1110000 0x10000>;
devbus-bootcs {
status = "okay";
diff --git a/arch/arm/boot/dts/armada-xp-synology-ds414.dts b/arch/arm/boot/dts/armada-xp-synology-ds414.dts
index 2391b11dc546b..d17dab0a6f513 100644
--- a/arch/arm/boot/dts/armada-xp-synology-ds414.dts
+++ b/arch/arm/boot/dts/armada-xp-synology-ds414.dts
@@ -78,8 +78,8 @@
soc {
ranges = ;
+ MBUS_ID(0x09, 0x09) 0 0 0xf1100000 0x10000
+ MBUS_ID(0x09, 0x05) 0 0 0xf1110000 0x10000>;
pcie-controller {
status = "okay";
diff --git a/arch/arm/boot/dts/dra7.dtsi b/arch/arm/boot/dts/dra7.dtsi
index c4d9175b90dce..f82aa44c3ceed 100644
--- a/arch/arm/boot/dts/dra7.dtsi
+++ b/arch/arm/boot/dts/dra7.dtsi
@@ -1500,6 +1500,16 @@
0x48485200 0x2E00>;
#address-cells = <1>;
#size-cells = <1>;
+
+ /*
+ * Do not allow gating of cpsw clock as workaround
+ * for errata i877. Keeping internal clock disabled
+ * causes the device switching characteristics
+ * to degrade over time and eventually fail to meet
+ * the data manual delay time/skew specs.
+ */
+ ti,no-idle;
+
/*
* rx_thresh_pend
* rx_pend
diff --git a/arch/arm/boot/dts/r8a7791-porter.dts b/arch/arm/boot/dts/r8a7791-porter.dts
index 6713b1ea732b0..01d239c3eaaaa 100644
--- a/arch/arm/boot/dts/r8a7791-porter.dts
+++ b/arch/arm/boot/dts/r8a7791-porter.dts
@@ -283,7 +283,6 @@
pinctrl-names = "default";
status = "okay";
- renesas,enable-gpio = <&gpio5 31 GPIO_ACTIVE_HIGH>;
};
&usbphy {
diff --git a/arch/arm/include/asm/cacheflush.h b/arch/arm/include/asm/cacheflush.h
index d5525bfc7e3e6..9156fc303afd8 100644
--- a/arch/arm/include/asm/cacheflush.h
+++ b/arch/arm/include/asm/cacheflush.h
@@ -491,7 +491,6 @@ static inline int set_memory_nx(unsigned long addr, int numpages) { return 0; }
#endif
#ifdef CONFIG_DEBUG_RODATA
-void mark_rodata_ro(void);
void set_kernel_text_rw(void);
void set_kernel_text_ro(void);
#else
diff --git a/arch/arm/kernel/Makefile b/arch/arm/kernel/Makefile
index 2c5f160be65e4..ad325a8c7e1e5 100644
--- a/arch/arm/kernel/Makefile
+++ b/arch/arm/kernel/Makefile
@@ -88,6 +88,7 @@ obj-$(CONFIG_DEBUG_LL) += debug.o
obj-$(CONFIG_EARLY_PRINTK) += early_printk.o
obj-$(CONFIG_ARM_VIRT_EXT) += hyp-stub.o
+AFLAGS_hyp-stub.o :=-Wa,-march=armv7-a
ifeq ($(CONFIG_ARM_PSCI),y)
obj-$(CONFIG_SMP) += psci_smp.o
endif
diff --git a/arch/arm/kernel/setup.c b/arch/arm/kernel/setup.c
index 7d0cba6f1cc5e..139791ed473d5 100644
--- a/arch/arm/kernel/setup.c
+++ b/arch/arm/kernel/setup.c
@@ -176,13 +176,13 @@ static struct resource mem_res[] = {
.name = "Kernel code",
.start = 0,
.end = 0,
- .flags = IORESOURCE_MEM
+ .flags = IORESOURCE_SYSTEM_RAM
},
{
.name = "Kernel data",
.start = 0,
.end = 0,
- .flags = IORESOURCE_MEM
+ .flags = IORESOURCE_SYSTEM_RAM
}
};
@@ -851,7 +851,7 @@ static void __init request_standard_resources(const struct machine_desc *mdesc)
res->name = "System RAM";
res->start = __pfn_to_phys(memblock_region_memory_base_pfn(region));
res->end = __pfn_to_phys(memblock_region_memory_end_pfn(region)) - 1;
- res->flags = IORESOURCE_MEM | IORESOURCE_BUSY;
+ res->flags = IORESOURCE_SYSTEM_RAM | IORESOURCE_BUSY;
request_resource(&iomem_resource, res);
diff --git a/arch/arm/kernel/smp.c b/arch/arm/kernel/smp.c
index 37312f6749f3d..baee70267f293 100644
--- a/arch/arm/kernel/smp.c
+++ b/arch/arm/kernel/smp.c
@@ -409,7 +409,7 @@ asmlinkage void secondary_start_kernel(void)
/*
* OK, it's off to the idle thread for us
*/
- cpu_startup_entry(CPUHP_ONLINE);
+ cpu_startup_entry(CPUHP_AP_ONLINE_IDLE);
}
void __init smp_cpus_done(unsigned int max_cpus)
diff --git a/arch/arm/kvm/arm.c b/arch/arm/kvm/arm.c
index dda1959f0ddeb..08e49c423c241 100644
--- a/arch/arm/kvm/arm.c
+++ b/arch/arm/kvm/arm.c
@@ -506,18 +506,18 @@ static void kvm_arm_resume_guest(struct kvm *kvm)
struct kvm_vcpu *vcpu;
kvm_for_each_vcpu(i, vcpu, kvm) {
- wait_queue_head_t *wq = kvm_arch_vcpu_wq(vcpu);
+ struct swait_queue_head *wq = kvm_arch_vcpu_wq(vcpu);
vcpu->arch.pause = false;
- wake_up_interruptible(wq);
+ swake_up(wq);
}
}
static void vcpu_sleep(struct kvm_vcpu *vcpu)
{
- wait_queue_head_t *wq = kvm_arch_vcpu_wq(vcpu);
+ struct swait_queue_head *wq = kvm_arch_vcpu_wq(vcpu);
- wait_event_interruptible(*wq, ((!vcpu->arch.power_off) &&
+ swait_event_interruptible(*wq, ((!vcpu->arch.power_off) &&
(!vcpu->arch.pause)));
}
diff --git a/arch/arm/kvm/guest.c b/arch/arm/kvm/guest.c
index 5fa69d7bae58a..99361f11354a0 100644
--- a/arch/arm/kvm/guest.c
+++ b/arch/arm/kvm/guest.c
@@ -161,7 +161,7 @@ static int get_timer_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg)
u64 val;
val = kvm_arm_timer_get_reg(vcpu, reg->id);
- return copy_to_user(uaddr, &val, KVM_REG_SIZE(reg->id));
+ return copy_to_user(uaddr, &val, KVM_REG_SIZE(reg->id)) ? -EFAULT : 0;
}
static unsigned long num_core_regs(void)
diff --git a/arch/arm/kvm/psci.c b/arch/arm/kvm/psci.c
index a9b3b905e661d..c2b131527a643 100644
--- a/arch/arm/kvm/psci.c
+++ b/arch/arm/kvm/psci.c
@@ -70,7 +70,7 @@ static unsigned long kvm_psci_vcpu_on(struct kvm_vcpu *source_vcpu)
{
struct kvm *kvm = source_vcpu->kvm;
struct kvm_vcpu *vcpu = NULL;
- wait_queue_head_t *wq;
+ struct swait_queue_head *wq;
unsigned long cpu_id;
unsigned long context_id;
phys_addr_t target_pc;
@@ -119,7 +119,7 @@ static unsigned long kvm_psci_vcpu_on(struct kvm_vcpu *source_vcpu)
smp_mb(); /* Make sure the above is visible */
wq = kvm_arch_vcpu_wq(vcpu);
- wake_up_interruptible(wq);
+ swake_up(wq);
return PSCI_RET_SUCCESS;
}
diff --git a/arch/arm/mach-lpc32xx/phy3250.c b/arch/arm/mach-lpc32xx/phy3250.c
index 77d6b1bab2782..ee06fabdf60e6 100644
--- a/arch/arm/mach-lpc32xx/phy3250.c
+++ b/arch/arm/mach-lpc32xx/phy3250.c
@@ -86,8 +86,8 @@ static int lpc32xx_clcd_setup(struct clcd_fb *fb)
{
dma_addr_t dma;
- fb->fb.screen_base = dma_alloc_writecombine(&fb->dev->dev,
- PANEL_SIZE, &dma, GFP_KERNEL);
+ fb->fb.screen_base = dma_alloc_wc(&fb->dev->dev, PANEL_SIZE, &dma,
+ GFP_KERNEL);
if (!fb->fb.screen_base) {
printk(KERN_ERR "CLCD: unable to map framebuffer\n");
return -ENOMEM;
@@ -116,15 +116,14 @@ static int lpc32xx_clcd_setup(struct clcd_fb *fb)
static int lpc32xx_clcd_mmap(struct clcd_fb *fb, struct vm_area_struct *vma)
{
- return dma_mmap_writecombine(&fb->dev->dev, vma,
- fb->fb.screen_base, fb->fb.fix.smem_start,
- fb->fb.fix.smem_len);
+ return dma_mmap_wc(&fb->dev->dev, vma, fb->fb.screen_base,
+ fb->fb.fix.smem_start, fb->fb.fix.smem_len);
}
static void lpc32xx_clcd_remove(struct clcd_fb *fb)
{
- dma_free_writecombine(&fb->dev->dev, fb->fb.fix.smem_len,
- fb->fb.screen_base, fb->fb.fix.smem_start);
+ dma_free_wc(&fb->dev->dev, fb->fb.fix.smem_len, fb->fb.screen_base,
+ fb->fb.fix.smem_start);
}
/*
diff --git a/arch/arm/mach-mvebu/Kconfig b/arch/arm/mach-mvebu/Kconfig
index 64e3d2ce9a076..b003e3afd6930 100644
--- a/arch/arm/mach-mvebu/Kconfig
+++ b/arch/arm/mach-mvebu/Kconfig
@@ -3,7 +3,6 @@ menuconfig ARCH_MVEBU
depends on ARCH_MULTI_V7 || ARCH_MULTI_V5
select ARCH_SUPPORTS_BIG_ENDIAN
select CLKSRC_MMIO
- select GENERIC_IRQ_CHIP
select PINCTRL
select PLAT_ORION
select SOC_BUS
@@ -29,6 +28,7 @@ config MACH_ARMADA_370
bool "Marvell Armada 370 boards"
depends on ARCH_MULTI_V7
select ARMADA_370_CLK
+ select ARMADA_370_XP_IRQ
select CPU_PJ4B
select MACH_MVEBU_V7
select PINCTRL_ARMADA_370
@@ -39,6 +39,7 @@ config MACH_ARMADA_370
config MACH_ARMADA_375
bool "Marvell Armada 375 boards"
depends on ARCH_MULTI_V7
+ select ARMADA_370_XP_IRQ
select ARM_ERRATA_720789
select ARM_ERRATA_753970
select ARM_GIC
@@ -58,6 +59,7 @@ config MACH_ARMADA_38X
select ARM_ERRATA_720789
select ARM_ERRATA_753970
select ARM_GIC
+ select ARMADA_370_XP_IRQ
select ARMADA_38X_CLK
select HAVE_ARM_SCU
select HAVE_ARM_TWD if SMP
@@ -72,6 +74,7 @@ config MACH_ARMADA_39X
bool "Marvell Armada 39x boards"
depends on ARCH_MULTI_V7
select ARM_GIC
+ select ARMADA_370_XP_IRQ
select ARMADA_39X_CLK
select CACHE_L2X0
select HAVE_ARM_SCU
@@ -86,6 +89,7 @@ config MACH_ARMADA_39X
config MACH_ARMADA_XP
bool "Marvell Armada XP boards"
depends on ARCH_MULTI_V7
+ select ARMADA_370_XP_IRQ
select ARMADA_XP_CLK
select CPU_PJ4B
select MACH_MVEBU_V7
diff --git a/arch/arm/mach-netx/fb.c b/arch/arm/mach-netx/fb.c
index d122ee6ab9913..8814ee5e98fd5 100644
--- a/arch/arm/mach-netx/fb.c
+++ b/arch/arm/mach-netx/fb.c
@@ -42,8 +42,8 @@ int netx_clcd_setup(struct clcd_fb *fb)
fb->panel = netx_panel;
- fb->fb.screen_base = dma_alloc_writecombine(&fb->dev->dev, 1024*1024,
- &dma, GFP_KERNEL);
+ fb->fb.screen_base = dma_alloc_wc(&fb->dev->dev, 1024 * 1024, &dma,
+ GFP_KERNEL);
if (!fb->fb.screen_base) {
printk(KERN_ERR "CLCD: unable to map framebuffer\n");
return -ENOMEM;
@@ -57,16 +57,14 @@ int netx_clcd_setup(struct clcd_fb *fb)
int netx_clcd_mmap(struct clcd_fb *fb, struct vm_area_struct *vma)
{
- return dma_mmap_writecombine(&fb->dev->dev, vma,
- fb->fb.screen_base,
- fb->fb.fix.smem_start,
- fb->fb.fix.smem_len);
+ return dma_mmap_wc(&fb->dev->dev, vma, fb->fb.screen_base,
+ fb->fb.fix.smem_start, fb->fb.fix.smem_len);
}
void netx_clcd_remove(struct clcd_fb *fb)
{
- dma_free_writecombine(&fb->dev->dev, fb->fb.fix.smem_len,
- fb->fb.screen_base, fb->fb.fix.smem_start);
+ dma_free_wc(&fb->dev->dev, fb->fb.fix.smem_len, fb->fb.screen_base,
+ fb->fb.fix.smem_start);
}
static AMBA_AHB_DEVICE(fb, "fb", 0, 0x00104000, { NETX_IRQ_LCD }, NULL);
diff --git a/arch/arm/mach-nspire/clcd.c b/arch/arm/mach-nspire/clcd.c
index abea12617b173..ea0e5b2ca1cd4 100644
--- a/arch/arm/mach-nspire/clcd.c
+++ b/arch/arm/mach-nspire/clcd.c
@@ -90,8 +90,8 @@ int nspire_clcd_setup(struct clcd_fb *fb)
panel_size = ((panel->mode.xres * panel->mode.yres) * panel->bpp) / 8;
panel_size = ALIGN(panel_size, PAGE_SIZE);
- fb->fb.screen_base = dma_alloc_writecombine(&fb->dev->dev,
- panel_size, &dma, GFP_KERNEL);
+ fb->fb.screen_base = dma_alloc_wc(&fb->dev->dev, panel_size, &dma,
+ GFP_KERNEL);
if (!fb->fb.screen_base) {
pr_err("CLCD: unable to map framebuffer\n");
@@ -107,13 +107,12 @@ int nspire_clcd_setup(struct clcd_fb *fb)
int nspire_clcd_mmap(struct clcd_fb *fb, struct vm_area_struct *vma)
{
- return dma_mmap_writecombine(&fb->dev->dev, vma,
- fb->fb.screen_base, fb->fb.fix.smem_start,
- fb->fb.fix.smem_len);
+ return dma_mmap_wc(&fb->dev->dev, vma, fb->fb.screen_base,
+ fb->fb.fix.smem_start, fb->fb.fix.smem_len);
}
void nspire_clcd_remove(struct clcd_fb *fb)
{
- dma_free_writecombine(&fb->dev->dev, fb->fb.fix.smem_len,
- fb->fb.screen_base, fb->fb.fix.smem_start);
+ dma_free_wc(&fb->dev->dev, fb->fb.fix.smem_len, fb->fb.screen_base,
+ fb->fb.fix.smem_start);
}
diff --git a/arch/arm/mach-omap2/omap_hwmod.c b/arch/arm/mach-omap2/omap_hwmod.c
index e9f65fec55c0b..b6d62e4cdfdda 100644
--- a/arch/arm/mach-omap2/omap_hwmod.c
+++ b/arch/arm/mach-omap2/omap_hwmod.c
@@ -2200,6 +2200,11 @@ static int _enable(struct omap_hwmod *oh)
*/
static int _idle(struct omap_hwmod *oh)
{
+ if (oh->flags & HWMOD_NO_IDLE) {
+ oh->_int_flags |= _HWMOD_SKIP_ENABLE;
+ return 0;
+ }
+
pr_debug("omap_hwmod: %s: idling\n", oh->name);
if (oh->_state != _HWMOD_STATE_ENABLED) {
@@ -2504,6 +2509,8 @@ static int __init _init(struct omap_hwmod *oh, void *data)
oh->flags |= HWMOD_INIT_NO_RESET;
if (of_find_property(np, "ti,no-idle-on-init", NULL))
oh->flags |= HWMOD_INIT_NO_IDLE;
+ if (of_find_property(np, "ti,no-idle", NULL))
+ oh->flags |= HWMOD_NO_IDLE;
}
oh->_state = _HWMOD_STATE_INITIALIZED;
@@ -2630,7 +2637,7 @@ static void __init _setup_postsetup(struct omap_hwmod *oh)
* XXX HWMOD_INIT_NO_IDLE does not belong in hwmod data -
* it should be set by the core code as a runtime flag during startup
*/
- if ((oh->flags & HWMOD_INIT_NO_IDLE) &&
+ if ((oh->flags & (HWMOD_INIT_NO_IDLE | HWMOD_NO_IDLE)) &&
(postsetup_state == _HWMOD_STATE_IDLE)) {
oh->_int_flags |= _HWMOD_SKIP_ENABLE;
postsetup_state = _HWMOD_STATE_ENABLED;
diff --git a/arch/arm/mach-omap2/omap_hwmod.h b/arch/arm/mach-omap2/omap_hwmod.h
index 76bce11c85a40..7c7a31169475b 100644
--- a/arch/arm/mach-omap2/omap_hwmod.h
+++ b/arch/arm/mach-omap2/omap_hwmod.h
@@ -525,6 +525,8 @@ struct omap_hwmod_omap4_prcm {
* or idled.
* HWMOD_OPT_CLKS_NEEDED: The optional clocks are needed for the module to
* operate and they need to be handled at the same time as the main_clk.
+ * HWMOD_NO_IDLE: Do not idle the hwmod at all. Useful to handle certain
+ * IPs like CPSW on DRA7, where clocks to this module cannot be disabled.
*/
#define HWMOD_SWSUP_SIDLE (1 << 0)
#define HWMOD_SWSUP_MSTANDBY (1 << 1)
@@ -541,6 +543,7 @@ struct omap_hwmod_omap4_prcm {
#define HWMOD_SWSUP_SIDLE_ACT (1 << 12)
#define HWMOD_RECONFIG_IO_CHAIN (1 << 13)
#define HWMOD_OPT_CLKS_NEEDED (1 << 14)
+#define HWMOD_NO_IDLE (1 << 15)
/*
* omap_hwmod._int_flags definitions
diff --git a/arch/arm/mm/pageattr.c b/arch/arm/mm/pageattr.c
index cf30daff89325..d19b1ad29b075 100644
--- a/arch/arm/mm/pageattr.c
+++ b/arch/arm/mm/pageattr.c
@@ -49,6 +49,9 @@ static int change_memory_common(unsigned long addr, int numpages,
WARN_ON_ONCE(1);
}
+ if (!numpages)
+ return 0;
+
if (start < MODULES_VADDR || start >= MODULES_END)
return -EINVAL;
diff --git a/arch/arm/plat-samsung/pm-check.c b/arch/arm/plat-samsung/pm-check.c
index 04aff2c31b460..70f2f699bed3e 100644
--- a/arch/arm/plat-samsung/pm-check.c
+++ b/arch/arm/plat-samsung/pm-check.c
@@ -53,8 +53,8 @@ static void s3c_pm_run_res(struct resource *ptr, run_fn_t fn, u32 *arg)
if (ptr->child != NULL)
s3c_pm_run_res(ptr->child, fn, arg);
- if ((ptr->flags & IORESOURCE_MEM) &&
- strcmp(ptr->name, "System RAM") == 0) {
+ if ((ptr->flags & IORESOURCE_SYSTEM_RAM)
+ == IORESOURCE_SYSTEM_RAM) {
S3C_PMDBG("Found system RAM at %08lx..%08lx\n",
(unsigned long)ptr->start,
(unsigned long)ptr->end);
diff --git a/arch/arm/vdso/vdso.S b/arch/arm/vdso/vdso.S
index b2b97e3e7babb..a62a7b64f49c5 100644
--- a/arch/arm/vdso/vdso.S
+++ b/arch/arm/vdso/vdso.S
@@ -23,9 +23,8 @@
#include
#include
- __PAGE_ALIGNED_DATA
-
.globl vdso_start, vdso_end
+ .section .data..ro_after_init
.balign PAGE_SIZE
vdso_start:
.incbin "arch/arm/vdso/vdso.so"
diff --git a/arch/arm64/include/asm/cacheflush.h b/arch/arm64/include/asm/cacheflush.h
index 7fc294c3bc5ba..22dda613f9c91 100644
--- a/arch/arm64/include/asm/cacheflush.h
+++ b/arch/arm64/include/asm/cacheflush.h
@@ -156,8 +156,4 @@ int set_memory_rw(unsigned long addr, int numpages);
int set_memory_x(unsigned long addr, int numpages);
int set_memory_nx(unsigned long addr, int numpages);
-#ifdef CONFIG_DEBUG_RODATA
-void mark_rodata_ro(void);
-#endif
-
#endif
diff --git a/arch/arm64/include/asm/pgtable.h b/arch/arm64/include/asm/pgtable.h
index bf464de33f52f..819aff5d593f7 100644
--- a/arch/arm64/include/asm/pgtable.h
+++ b/arch/arm64/include/asm/pgtable.h
@@ -34,7 +34,7 @@
/*
* VMALLOC and SPARSEMEM_VMEMMAP ranges.
*
- * VMEMAP_SIZE: allows the whole VA space to be covered by a struct page array
+ * VMEMAP_SIZE: allows the whole linear region to be covered by a struct page array
* (rounded up to PUD_SIZE).
* VMALLOC_START: beginning of the kernel VA space
* VMALLOC_END: extends to the available space below vmmemmap, PCI I/O space,
@@ -51,7 +51,9 @@
#define VMALLOC_END (PAGE_OFFSET - PUD_SIZE - VMEMMAP_SIZE - SZ_64K)
-#define vmemmap ((struct page *)(VMALLOC_END + SZ_64K))
+#define VMEMMAP_START (VMALLOC_END + SZ_64K)
+#define vmemmap ((struct page *)VMEMMAP_START - \
+ SECTION_ALIGN_DOWN(memstart_addr >> PAGE_SHIFT))
#define FIRST_USER_ADDRESS 0UL
diff --git a/arch/arm64/kernel/setup.c b/arch/arm64/kernel/setup.c
index 8119479147db1..450987d99b9b9 100644
--- a/arch/arm64/kernel/setup.c
+++ b/arch/arm64/kernel/setup.c
@@ -73,13 +73,13 @@ static struct resource mem_res[] = {
.name = "Kernel code",
.start = 0,
.end = 0,
- .flags = IORESOURCE_MEM
+ .flags = IORESOURCE_SYSTEM_RAM
},
{
.name = "Kernel data",
.start = 0,
.end = 0,
- .flags = IORESOURCE_MEM
+ .flags = IORESOURCE_SYSTEM_RAM
}
};
@@ -210,7 +210,7 @@ static void __init request_standard_resources(void)
res->name = "System RAM";
res->start = __pfn_to_phys(memblock_region_memory_base_pfn(region));
res->end = __pfn_to_phys(memblock_region_memory_end_pfn(region)) - 1;
- res->flags = IORESOURCE_MEM | IORESOURCE_BUSY;
+ res->flags = IORESOURCE_SYSTEM_RAM | IORESOURCE_BUSY;
request_resource(&iomem_resource, res);
diff --git a/arch/arm64/kernel/sleep.S b/arch/arm64/kernel/sleep.S
index e33fe33876ab3..fd10eb6638689 100644
--- a/arch/arm64/kernel/sleep.S
+++ b/arch/arm64/kernel/sleep.S
@@ -145,6 +145,10 @@ ENTRY(cpu_resume_mmu)
ENDPROC(cpu_resume_mmu)
.popsection
cpu_resume_after_mmu:
+#ifdef CONFIG_KASAN
+ mov x0, sp
+ bl kasan_unpoison_remaining_stack
+#endif
mov x0, #0 // return zero on success
ldp x19, x20, [sp, #16]
ldp x21, x22, [sp, #32]
diff --git a/arch/arm64/kernel/smp.c b/arch/arm64/kernel/smp.c
index b1adc51b2c2e7..460765799c642 100644
--- a/arch/arm64/kernel/smp.c
+++ b/arch/arm64/kernel/smp.c
@@ -195,7 +195,7 @@ asmlinkage void secondary_start_kernel(void)
/*
* OK, it's off to the idle thread for us
*/
- cpu_startup_entry(CPUHP_ONLINE);
+ cpu_startup_entry(CPUHP_AP_ONLINE_IDLE);
}
#ifdef CONFIG_HOTPLUG_CPU
diff --git a/arch/arm64/kvm/guest.c b/arch/arm64/kvm/guest.c
index fcb778899a380..9e54ad7c240ac 100644
--- a/arch/arm64/kvm/guest.c
+++ b/arch/arm64/kvm/guest.c
@@ -194,7 +194,7 @@ static int get_timer_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg)
u64 val;
val = kvm_arm_timer_get_reg(vcpu, reg->id);
- return copy_to_user(uaddr, &val, KVM_REG_SIZE(reg->id));
+ return copy_to_user(uaddr, &val, KVM_REG_SIZE(reg->id)) ? -EFAULT : 0;
}
/**
diff --git a/arch/arm64/mm/hugetlbpage.c b/arch/arm64/mm/hugetlbpage.c
index 82d607c3614ed..da30529bb1f65 100644
--- a/arch/arm64/mm/hugetlbpage.c
+++ b/arch/arm64/mm/hugetlbpage.c
@@ -306,10 +306,6 @@ static __init int setup_hugepagesz(char *opt)
hugetlb_add_hstate(PMD_SHIFT - PAGE_SHIFT);
} else if (ps == PUD_SIZE) {
hugetlb_add_hstate(PUD_SHIFT - PAGE_SHIFT);
- } else if (ps == (PAGE_SIZE * CONT_PTES)) {
- hugetlb_add_hstate(CONT_PTE_SHIFT);
- } else if (ps == (PMD_SIZE * CONT_PMDS)) {
- hugetlb_add_hstate((PMD_SHIFT + CONT_PMD_SHIFT) - PAGE_SHIFT);
} else {
pr_err("hugepagesz: Unsupported page size %lu K\n", ps >> 10);
return 0;
@@ -317,13 +313,3 @@ static __init int setup_hugepagesz(char *opt)
return 1;
}
__setup("hugepagesz=", setup_hugepagesz);
-
-#ifdef CONFIG_ARM64_64K_PAGES
-static __init int add_default_hugepagesz(void)
-{
- if (size_to_hstate(CONT_PTES * PAGE_SIZE) == NULL)
- hugetlb_add_hstate(CONT_PMD_SHIFT);
- return 0;
-}
-arch_initcall(add_default_hugepagesz);
-#endif
diff --git a/arch/arm64/mm/init.c b/arch/arm64/mm/init.c
index f3b061e67bfe0..7802f216a67a5 100644
--- a/arch/arm64/mm/init.c
+++ b/arch/arm64/mm/init.c
@@ -319,8 +319,8 @@ void __init mem_init(void)
#endif
MLG(VMALLOC_START, VMALLOC_END),
#ifdef CONFIG_SPARSEMEM_VMEMMAP
- MLG((unsigned long)vmemmap,
- (unsigned long)vmemmap + VMEMMAP_SIZE),
+ MLG(VMEMMAP_START,
+ VMEMMAP_START + VMEMMAP_SIZE),
MLM((unsigned long)virt_to_page(PAGE_OFFSET),
(unsigned long)virt_to_page(high_memory)),
#endif
diff --git a/arch/avr32/kernel/setup.c b/arch/avr32/kernel/setup.c
index 209ae5ad34958..e6928896da2a4 100644
--- a/arch/avr32/kernel/setup.c
+++ b/arch/avr32/kernel/setup.c
@@ -49,13 +49,13 @@ static struct resource __initdata kernel_data = {
.name = "Kernel data",
.start = 0,
.end = 0,
- .flags = IORESOURCE_MEM,
+ .flags = IORESOURCE_SYSTEM_RAM,
};
static struct resource __initdata kernel_code = {
.name = "Kernel code",
.start = 0,
.end = 0,
- .flags = IORESOURCE_MEM,
+ .flags = IORESOURCE_SYSTEM_RAM,
.sibling = &kernel_data,
};
@@ -134,7 +134,7 @@ add_physical_memory(resource_size_t start, resource_size_t end)
new->start = start;
new->end = end;
new->name = "System RAM";
- new->flags = IORESOURCE_MEM;
+ new->flags = IORESOURCE_SYSTEM_RAM;
*pprev = new;
}
diff --git a/arch/blackfin/mach-common/smp.c b/arch/blackfin/mach-common/smp.c
index 0030e21cfceb3..23c4ef5f8bdce 100644
--- a/arch/blackfin/mach-common/smp.c
+++ b/arch/blackfin/mach-common/smp.c
@@ -333,7 +333,7 @@ void secondary_start_kernel(void)
/* We are done with local CPU inits, unblock the boot CPU. */
set_cpu_online(cpu, true);
- cpu_startup_entry(CPUHP_ONLINE);
+ cpu_startup_entry(CPUHP_AP_ONLINE_IDLE);
}
void __init smp_prepare_boot_cpu(void)
diff --git a/arch/c6x/kernel/setup.c b/arch/c6x/kernel/setup.c
index 72e17f7ebd6ff..786e36e2f61de 100644
--- a/arch/c6x/kernel/setup.c
+++ b/arch/c6x/kernel/setup.c
@@ -281,8 +281,6 @@ notrace void __init machine_init(unsigned long dt_ptr)
*/
set_ist(_vectors_start);
- lockdep_init();
-
/*
* dtb is passed in from bootloader.
* fdt is linked in blob.
diff --git a/arch/hexagon/kernel/smp.c b/arch/hexagon/kernel/smp.c
index ff759f26b96ae..983bae7d2665c 100644
--- a/arch/hexagon/kernel/smp.c
+++ b/arch/hexagon/kernel/smp.c
@@ -180,7 +180,7 @@ void start_secondary(void)
local_irq_enable();
- cpu_startup_entry(CPUHP_ONLINE);
+ cpu_startup_entry(CPUHP_AP_ONLINE_IDLE);
}
diff --git a/arch/ia64/kernel/efi.c b/arch/ia64/kernel/efi.c
index caae3f4e4341a..300dac3702f11 100644
--- a/arch/ia64/kernel/efi.c
+++ b/arch/ia64/kernel/efi.c
@@ -1178,7 +1178,7 @@ efi_initialize_iomem_resources(struct resource *code_resource,
efi_memory_desc_t *md;
u64 efi_desc_size;
char *name;
- unsigned long flags;
+ unsigned long flags, desc;
efi_map_start = __va(ia64_boot_param->efi_memmap);
efi_map_end = efi_map_start + ia64_boot_param->efi_memmap_size;
@@ -1193,6 +1193,8 @@ efi_initialize_iomem_resources(struct resource *code_resource,
continue;
flags = IORESOURCE_MEM | IORESOURCE_BUSY;
+ desc = IORES_DESC_NONE;
+
switch (md->type) {
case EFI_MEMORY_MAPPED_IO:
@@ -1207,14 +1209,17 @@ efi_initialize_iomem_resources(struct resource *code_resource,
if (md->attribute & EFI_MEMORY_WP) {
name = "System ROM";
flags |= IORESOURCE_READONLY;
- } else if (md->attribute == EFI_MEMORY_UC)
+ } else if (md->attribute == EFI_MEMORY_UC) {
name = "Uncached RAM";
- else
+ } else {
name = "System RAM";
+ flags |= IORESOURCE_SYSRAM;
+ }
break;
case EFI_ACPI_MEMORY_NVS:
name = "ACPI Non-volatile Storage";
+ desc = IORES_DESC_ACPI_NV_STORAGE;
break;
case EFI_UNUSABLE_MEMORY:
@@ -1224,6 +1229,7 @@ efi_initialize_iomem_resources(struct resource *code_resource,
case EFI_PERSISTENT_MEMORY:
name = "Persistent Memory";
+ desc = IORES_DESC_PERSISTENT_MEMORY;
break;
case EFI_RESERVED_TYPE:
@@ -1246,6 +1252,7 @@ efi_initialize_iomem_resources(struct resource *code_resource,
res->start = md->phys_addr;
res->end = md->phys_addr + efi_md_size(md) - 1;
res->flags = flags;
+ res->desc = desc;
if (insert_resource(&iomem_resource, res) < 0)
kfree(res);
diff --git a/arch/ia64/kernel/setup.c b/arch/ia64/kernel/setup.c
index 4f118b0d30915..2029a38a72aee 100644
--- a/arch/ia64/kernel/setup.c
+++ b/arch/ia64/kernel/setup.c
@@ -80,17 +80,17 @@ unsigned long vga_console_membase;
static struct resource data_resource = {
.name = "Kernel data",
- .flags = IORESOURCE_BUSY | IORESOURCE_MEM
+ .flags = IORESOURCE_BUSY | IORESOURCE_SYSTEM_RAM
};
static struct resource code_resource = {
.name = "Kernel code",
- .flags = IORESOURCE_BUSY | IORESOURCE_MEM
+ .flags = IORESOURCE_BUSY | IORESOURCE_SYSTEM_RAM
};
static struct resource bss_resource = {
.name = "Kernel bss",
- .flags = IORESOURCE_BUSY | IORESOURCE_MEM
+ .flags = IORESOURCE_BUSY | IORESOURCE_SYSTEM_RAM
};
unsigned long ia64_max_cacheline_size;
diff --git a/arch/ia64/kernel/smpboot.c b/arch/ia64/kernel/smpboot.c
index 0e76fad27975d..74fe317477e62 100644
--- a/arch/ia64/kernel/smpboot.c
+++ b/arch/ia64/kernel/smpboot.c
@@ -454,7 +454,7 @@ start_secondary (void *unused)
preempt_disable();
smp_callin();
- cpu_startup_entry(CPUHP_ONLINE);
+ cpu_startup_entry(CPUHP_AP_ONLINE_IDLE);
return 0;
}
diff --git a/arch/m32r/kernel/setup.c b/arch/m32r/kernel/setup.c
index a5ecef7188baa..136c69f1fb8ab 100644
--- a/arch/m32r/kernel/setup.c
+++ b/arch/m32r/kernel/setup.c
@@ -70,14 +70,14 @@ static struct resource data_resource = {
.name = "Kernel data",
.start = 0,
.end = 0,
- .flags = IORESOURCE_BUSY | IORESOURCE_MEM
+ .flags = IORESOURCE_BUSY | IORESOURCE_SYSTEM_RAM
};
static struct resource code_resource = {
.name = "Kernel code",
.start = 0,
.end = 0,
- .flags = IORESOURCE_BUSY | IORESOURCE_MEM
+ .flags = IORESOURCE_BUSY | IORESOURCE_SYSTEM_RAM
};
unsigned long memory_start;
diff --git a/arch/m32r/kernel/smpboot.c b/arch/m32r/kernel/smpboot.c
index a468467542f45..f98d2f6519d63 100644
--- a/arch/m32r/kernel/smpboot.c
+++ b/arch/m32r/kernel/smpboot.c
@@ -432,7 +432,7 @@ int __init start_secondary(void *unused)
*/
local_flush_tlb_all();
- cpu_startup_entry(CPUHP_ONLINE);
+ cpu_startup_entry(CPUHP_AP_ONLINE_IDLE);
return 0;
}
diff --git a/arch/metag/kernel/smp.c b/arch/metag/kernel/smp.c
index c3c6f08648816..bad13232de518 100644
--- a/arch/metag/kernel/smp.c
+++ b/arch/metag/kernel/smp.c
@@ -396,7 +396,7 @@ asmlinkage void secondary_start_kernel(void)
/*
* OK, it's off to the idle thread for us
*/
- cpu_startup_entry(CPUHP_ONLINE);
+ cpu_startup_entry(CPUHP_AP_ONLINE_IDLE);
}
void __init smp_cpus_done(unsigned int max_cpus)
diff --git a/arch/microblaze/kernel/setup.c b/arch/microblaze/kernel/setup.c
index 89a2a93949274..f31ebb5dc26c2 100644
--- a/arch/microblaze/kernel/setup.c
+++ b/arch/microblaze/kernel/setup.c
@@ -130,8 +130,6 @@ void __init machine_early_init(const char *cmdline, unsigned int ram,
memset(__bss_start, 0, __bss_stop-__bss_start);
memset(_ssbss, 0, _esbss-_ssbss);
- lockdep_init();
-
/* initialize device tree for usage in early_printk */
early_init_devtree(_fdt_start);
diff --git a/arch/mips/Kconfig b/arch/mips/Kconfig
index 74a3db92da1b5..a65eacf59918e 100644
--- a/arch/mips/Kconfig
+++ b/arch/mips/Kconfig
@@ -151,6 +151,7 @@ config BMIPS_GENERIC
select CSRC_R4K
select SYNC_R4K
select COMMON_CLK
+ select BCM6345_L1_IRQ
select BCM7038_L1_IRQ
select BCM7120_L2_IRQ
select BRCMSTB_L2_IRQ
@@ -2169,7 +2170,6 @@ config MIPS_MT_SMP
select CPU_MIPSR2_IRQ_VI
select CPU_MIPSR2_IRQ_EI
select SYNC_R4K
- select MIPS_GIC_IPI
select MIPS_MT
select SMP
select SMP_UP
@@ -2267,7 +2267,6 @@ config MIPS_VPE_APSP_API_MT
config MIPS_CMP
bool "MIPS CMP framework support (DEPRECATED)"
depends on SYS_SUPPORTS_MIPS_CMP && !CPU_MIPSR6
- select MIPS_GIC_IPI
select SMP
select SYNC_R4K
select SYS_SUPPORTS_SMP
@@ -2287,7 +2286,6 @@ config MIPS_CPS
select MIPS_CM
select MIPS_CPC
select MIPS_CPS_PM if HOTPLUG_CPU
- select MIPS_GIC_IPI
select SMP
select SYNC_R4K if (CEVT_R4K || CSRC_R4K)
select SYS_SUPPORTS_HOTPLUG_CPU
@@ -2305,9 +2303,6 @@ config MIPS_CPS_PM
select MIPS_CPC
bool
-config MIPS_GIC_IPI
- bool
-
config MIPS_CM
bool
diff --git a/arch/mips/ath79/irq.c b/arch/mips/ath79/irq.c
index 511c06560dc1f..2dfff1f190043 100644
--- a/arch/mips/ath79/irq.c
+++ b/arch/mips/ath79/irq.c
@@ -26,90 +26,6 @@
#include "common.h"
#include "machtypes.h"
-static void __init ath79_misc_intc_domain_init(
- struct device_node *node, int irq);
-
-static void ath79_misc_irq_handler(struct irq_desc *desc)
-{
- struct irq_domain *domain = irq_desc_get_handler_data(desc);
- void __iomem *base = domain->host_data;
- u32 pending;
-
- pending = __raw_readl(base + AR71XX_RESET_REG_MISC_INT_STATUS) &
- __raw_readl(base + AR71XX_RESET_REG_MISC_INT_ENABLE);
-
- if (!pending) {
- spurious_interrupt();
- return;
- }
-
- while (pending) {
- int bit = __ffs(pending);
-
- generic_handle_irq(irq_linear_revmap(domain, bit));
- pending &= ~BIT(bit);
- }
-}
-
-static void ar71xx_misc_irq_unmask(struct irq_data *d)
-{
- void __iomem *base = irq_data_get_irq_chip_data(d);
- unsigned int irq = d->hwirq;
- u32 t;
-
- t = __raw_readl(base + AR71XX_RESET_REG_MISC_INT_ENABLE);
- __raw_writel(t | (1 << irq), base + AR71XX_RESET_REG_MISC_INT_ENABLE);
-
- /* flush write */
- __raw_readl(base + AR71XX_RESET_REG_MISC_INT_ENABLE);
-}
-
-static void ar71xx_misc_irq_mask(struct irq_data *d)
-{
- void __iomem *base = irq_data_get_irq_chip_data(d);
- unsigned int irq = d->hwirq;
- u32 t;
-
- t = __raw_readl(base + AR71XX_RESET_REG_MISC_INT_ENABLE);
- __raw_writel(t & ~(1 << irq), base + AR71XX_RESET_REG_MISC_INT_ENABLE);
-
- /* flush write */
- __raw_readl(base + AR71XX_RESET_REG_MISC_INT_ENABLE);
-}
-
-static void ar724x_misc_irq_ack(struct irq_data *d)
-{
- void __iomem *base = irq_data_get_irq_chip_data(d);
- unsigned int irq = d->hwirq;
- u32 t;
-
- t = __raw_readl(base + AR71XX_RESET_REG_MISC_INT_STATUS);
- __raw_writel(t & ~(1 << irq), base + AR71XX_RESET_REG_MISC_INT_STATUS);
-
- /* flush write */
- __raw_readl(base + AR71XX_RESET_REG_MISC_INT_STATUS);
-}
-
-static struct irq_chip ath79_misc_irq_chip = {
- .name = "MISC",
- .irq_unmask = ar71xx_misc_irq_unmask,
- .irq_mask = ar71xx_misc_irq_mask,
-};
-
-static void __init ath79_misc_irq_init(void)
-{
- if (soc_is_ar71xx() || soc_is_ar913x())
- ath79_misc_irq_chip.irq_mask_ack = ar71xx_misc_irq_mask;
- else if (soc_is_ar724x() ||
- soc_is_ar933x() ||
- soc_is_ar934x() ||
- soc_is_qca955x())
- ath79_misc_irq_chip.irq_ack = ar724x_misc_irq_ack;
- else
- BUG();
-
- ath79_misc_intc_domain_init(NULL, ATH79_CPU_IRQ(6));
-}
static void ar934x_ip2_irq_dispatch(struct irq_desc *desc)
{
@@ -212,142 +128,12 @@ static void qca955x_irq_init(void)
irq_set_chained_handler(ATH79_CPU_IRQ(3), qca955x_ip3_irq_dispatch);
}
-/*
- * The IP2/IP3 lines are tied to a PCI/WMAC/USB device. Drivers for
- * these devices typically allocate coherent DMA memory, however the
- * DMA controller may still have some unsynchronized data in the FIFO.
- * Issue a flush in the handlers to ensure that the driver sees
- * the update.
- *
- * This array map the interrupt lines to the DDR write buffer channels.
- */
-
-static unsigned irq_wb_chan[8] = {
- -1, -1, -1, -1, -1, -1, -1, -1,
-};
-
-asmlinkage void plat_irq_dispatch(void)
-{
- unsigned long pending;
- int irq;
-
- pending = read_c0_status() & read_c0_cause() & ST0_IM;
-
- if (!pending) {
- spurious_interrupt();
- return;
- }
-
- pending >>= CAUSEB_IP;
- while (pending) {
- irq = fls(pending) - 1;
- if (irq < ARRAY_SIZE(irq_wb_chan) && irq_wb_chan[irq] != -1)
- ath79_ddr_wb_flush(irq_wb_chan[irq]);
- do_IRQ(MIPS_CPU_IRQ_BASE + irq);
- pending &= ~BIT(irq);
- }
-}
-
-static int misc_map(struct irq_domain *d, unsigned int irq, irq_hw_number_t hw)
-{
- irq_set_chip_and_handler(irq, &ath79_misc_irq_chip, handle_level_irq);
- irq_set_chip_data(irq, d->host_data);
- return 0;
-}
-
-static const struct irq_domain_ops misc_irq_domain_ops = {
- .xlate = irq_domain_xlate_onecell,
- .map = misc_map,
-};
-
-static void __init ath79_misc_intc_domain_init(
- struct device_node *node, int irq)
-{
- void __iomem *base = ath79_reset_base;
- struct irq_domain *domain;
-
- domain = irq_domain_add_legacy(node, ATH79_MISC_IRQ_COUNT,
- ATH79_MISC_IRQ_BASE, 0, &misc_irq_domain_ops, base);
- if (!domain)
- panic("Failed to add MISC irqdomain");
-
- /* Disable and clear all interrupts */
- __raw_writel(0, base + AR71XX_RESET_REG_MISC_INT_ENABLE);
- __raw_writel(0, base + AR71XX_RESET_REG_MISC_INT_STATUS);
-
- irq_set_chained_handler_and_data(irq, ath79_misc_irq_handler, domain);
-}
-
-static int __init ath79_misc_intc_of_init(
- struct device_node *node, struct device_node *parent)
-{
- int irq;
-
- irq = irq_of_parse_and_map(node, 0);
- if (!irq)
- panic("Failed to get MISC IRQ");
-
- ath79_misc_intc_domain_init(node, irq);
- return 0;
-}
-
-static int __init ar7100_misc_intc_of_init(
- struct device_node *node, struct device_node *parent)
-{
- ath79_misc_irq_chip.irq_mask_ack = ar71xx_misc_irq_mask;
- return ath79_misc_intc_of_init(node, parent);
-}
-
-IRQCHIP_DECLARE(ar7100_misc_intc, "qca,ar7100-misc-intc",
- ar7100_misc_intc_of_init);
-
-static int __init ar7240_misc_intc_of_init(
- struct device_node *node, struct device_node *parent)
-{
- ath79_misc_irq_chip.irq_ack = ar724x_misc_irq_ack;
- return ath79_misc_intc_of_init(node, parent);
-}
-
-IRQCHIP_DECLARE(ar7240_misc_intc, "qca,ar7240-misc-intc",
- ar7240_misc_intc_of_init);
-
-static int __init ar79_cpu_intc_of_init(
- struct device_node *node, struct device_node *parent)
-{
- int err, i, count;
-
- /* Fill the irq_wb_chan table */
- count = of_count_phandle_with_args(
- node, "qca,ddr-wb-channels", "#qca,ddr-wb-channel-cells");
-
- for (i = 0; i < count; i++) {
- struct of_phandle_args args;
- u32 irq = i;
-
- of_property_read_u32_index(
- node, "qca,ddr-wb-channel-interrupts", i, &irq);
- if (irq >= ARRAY_SIZE(irq_wb_chan))
- continue;
-
- err = of_parse_phandle_with_args(
- node, "qca,ddr-wb-channels",
- "#qca,ddr-wb-channel-cells",
- i, &args);
- if (err)
- return err;
-
- irq_wb_chan[irq] = args.args[0];
- pr_info("IRQ: Set flush channel of IRQ%d to %d\n",
- irq, args.args[0]);
- }
-
- return mips_cpu_irq_of_init(node, parent);
-}
-IRQCHIP_DECLARE(ar79_cpu_intc, "qca,ar7100-cpu-intc",
- ar79_cpu_intc_of_init);
-
void __init arch_init_irq(void)
{
+ unsigned irq_wb_chan2 = -1;
+ unsigned irq_wb_chan3 = -1;
+ bool misc_is_ar71xx;
+
if (mips_machtype == ATH79_MACH_GENERIC_OF) {
irqchip_init();
return;
@@ -355,14 +141,26 @@ void __init arch_init_irq(void)
if (soc_is_ar71xx() || soc_is_ar724x() ||
soc_is_ar913x() || soc_is_ar933x()) {
- irq_wb_chan[2] = 3;
- irq_wb_chan[3] = 2;
+ irq_wb_chan2 = 3;
+ irq_wb_chan3 = 2;
} else if (soc_is_ar934x()) {
- irq_wb_chan[3] = 2;
+ irq_wb_chan3 = 2;
}
- mips_cpu_irq_init();
- ath79_misc_irq_init();
+ ath79_cpu_irq_init(irq_wb_chan2, irq_wb_chan3);
+
+ if (soc_is_ar71xx() || soc_is_ar913x())
+ misc_is_ar71xx = true;
+ else if (soc_is_ar724x() ||
+ soc_is_ar933x() ||
+ soc_is_ar934x() ||
+ soc_is_qca955x())
+ misc_is_ar71xx = false;
+ else
+ BUG();
+ ath79_misc_irq_init(
+ ath79_reset_base + AR71XX_RESET_REG_MISC_INT_STATUS,
+ ATH79_CPU_IRQ(6), ATH79_MISC_IRQ_BASE, misc_is_ar71xx);
if (soc_is_ar934x())
ar934x_ip2_irq_init();
diff --git a/arch/mips/bmips/irq.c b/arch/mips/bmips/irq.c
index e7fc6f9348baa..7efefcf440338 100644
--- a/arch/mips/bmips/irq.c
+++ b/arch/mips/bmips/irq.c
@@ -15,6 +15,12 @@
#include
#include
+static const struct of_device_id smp_intc_dt_match[] = {
+ { .compatible = "brcm,bcm7038-l1-intc" },
+ { .compatible = "brcm,bcm6345-l1-intc" },
+ {}
+};
+
unsigned int get_c0_compare_int(void)
{
return CP0_LEGACY_COMPARE_IRQ;
@@ -24,8 +30,8 @@ void __init arch_init_irq(void)
{
struct device_node *dn;
- /* Only the STB (bcm7038) controller supports SMP IRQ affinity */
- dn = of_find_compatible_node(NULL, NULL, "brcm,bcm7038-l1-intc");
+ /* Only these controllers support SMP IRQ affinity */
+ dn = of_find_matching_node(NULL, smp_intc_dt_match);
if (dn)
of_node_put(dn);
else
diff --git a/arch/mips/boot/compressed/uart-16550.c b/arch/mips/boot/compressed/uart-16550.c
index 408799a839b42..f7521142deda5 100644
--- a/arch/mips/boot/compressed/uart-16550.c
+++ b/arch/mips/boot/compressed/uart-16550.c
@@ -17,7 +17,7 @@
#define PORT(offset) (CKSEG1ADDR(AR7_REGS_UART0) + (4 * offset))
#endif
-#ifdef CONFIG_MACH_JZ4740
+#if defined(CONFIG_MACH_JZ4740) || defined(CONFIG_MACH_JZ4780)
#include
#define PORT(offset) (CKSEG1ADDR(JZ4740_UART0_BASE_ADDR) + (4 * offset))
#endif
diff --git a/arch/mips/include/asm/mach-ath79/ath79.h b/arch/mips/include/asm/mach-ath79/ath79.h
index 2b3487213d1e9..441faa92c3cd4 100644
--- a/arch/mips/include/asm/mach-ath79/ath79.h
+++ b/arch/mips/include/asm/mach-ath79/ath79.h
@@ -144,4 +144,8 @@ static inline u32 ath79_reset_rr(unsigned reg)
void ath79_device_reset_set(u32 mask);
void ath79_device_reset_clear(u32 mask);
+void ath79_cpu_irq_init(unsigned irq_wb_chan2, unsigned irq_wb_chan3);
+void ath79_misc_irq_init(void __iomem *regs, int irq,
+ int irq_base, bool is_ar71xx);
+
#endif /* __ASM_MACH_ATH79_H */
diff --git a/arch/mips/include/asm/smp-ops.h b/arch/mips/include/asm/smp-ops.h
index 6ba1fb8b11e22..db7c322f057f9 100644
--- a/arch/mips/include/asm/smp-ops.h
+++ b/arch/mips/include/asm/smp-ops.h
@@ -44,8 +44,9 @@ static inline void plat_smp_setup(void)
mp_ops->smp_setup();
}
-extern void gic_send_ipi_single(int cpu, unsigned int action);
-extern void gic_send_ipi_mask(const struct cpumask *mask, unsigned int action);
+extern void mips_smp_send_ipi_single(int cpu, unsigned int action);
+extern void mips_smp_send_ipi_mask(const struct cpumask *mask,
+ unsigned int action);
#else /* !CONFIG_SMP */
diff --git a/arch/mips/jz4740/gpio.c b/arch/mips/jz4740/gpio.c
index 8c6d76c9b2d69..d9907e57e9b98 100644
--- a/arch/mips/jz4740/gpio.c
+++ b/arch/mips/jz4740/gpio.c
@@ -270,7 +270,7 @@ uint32_t jz_gpio_port_get_value(int port, uint32_t mask)
}
EXPORT_SYMBOL(jz_gpio_port_get_value);
-#define IRQ_TO_BIT(irq) BIT(irq_to_gpio(irq) & 0x1f)
+#define IRQ_TO_BIT(irq) BIT((irq - JZ4740_IRQ_GPIO(0)) & 0x1f)
static void jz_gpio_check_trigger_both(struct jz_gpio_chip *chip, unsigned int irq)
{
diff --git a/arch/mips/kernel/Makefile b/arch/mips/kernel/Makefile
index 68e2b7db9348f..b0988fd62fcc3 100644
--- a/arch/mips/kernel/Makefile
+++ b/arch/mips/kernel/Makefile
@@ -52,7 +52,6 @@ obj-$(CONFIG_MIPS_MT_SMP) += smp-mt.o
obj-$(CONFIG_MIPS_CMP) += smp-cmp.o
obj-$(CONFIG_MIPS_CPS) += smp-cps.o cps-vec.o
obj-$(CONFIG_MIPS_CPS_NS16550) += cps-vec-ns16550.o
-obj-$(CONFIG_MIPS_GIC_IPI) += smp-gic.o
obj-$(CONFIG_MIPS_SPRAM) += spram.o
obj-$(CONFIG_MIPS_VPE_LOADER) += vpe.o
diff --git a/arch/mips/kernel/r2300_fpu.S b/arch/mips/kernel/r2300_fpu.S
index 5ce3b746cedc0..b4ac6374a38f2 100644
--- a/arch/mips/kernel/r2300_fpu.S
+++ b/arch/mips/kernel/r2300_fpu.S
@@ -125,7 +125,7 @@ LEAF(_restore_fp_context)
END(_restore_fp_context)
.set reorder
- .type fault@function
+ .type fault, @function
.ent fault
fault: li v0, -EFAULT
jr ra
diff --git a/arch/mips/kernel/r4k_fpu.S b/arch/mips/kernel/r4k_fpu.S
index f09546ee2cdc9..17732f876eff1 100644
--- a/arch/mips/kernel/r4k_fpu.S
+++ b/arch/mips/kernel/r4k_fpu.S
@@ -358,7 +358,7 @@ LEAF(_restore_msa_all_upper)
.set reorder
- .type fault@function
+ .type fault, @function
.ent fault
fault: li v0, -EFAULT # failure
jr ra
diff --git a/arch/mips/kernel/setup.c b/arch/mips/kernel/setup.c
index 5fdaf8bdcd2eb..4f607341a7938 100644
--- a/arch/mips/kernel/setup.c
+++ b/arch/mips/kernel/setup.c
@@ -732,21 +732,23 @@ static void __init resource_init(void)
end = HIGHMEM_START - 1;
res = alloc_bootmem(sizeof(struct resource));
+
+ res->start = start;
+ res->end = end;
+ res->flags = IORESOURCE_MEM | IORESOURCE_BUSY;
+
switch (boot_mem_map.map[i].type) {
case BOOT_MEM_RAM:
case BOOT_MEM_INIT_RAM:
case BOOT_MEM_ROM_DATA:
res->name = "System RAM";
+ res->flags |= IORESOURCE_SYSRAM;
break;
case BOOT_MEM_RESERVED:
default:
res->name = "reserved";
}
- res->start = start;
- res->end = end;
-
- res->flags = IORESOURCE_MEM | IORESOURCE_BUSY;
request_resource(&iomem_resource, res);
/*
diff --git a/arch/mips/kernel/smp-cmp.c b/arch/mips/kernel/smp-cmp.c
index d5e0f949dc480..76923349b4fe1 100644
--- a/arch/mips/kernel/smp-cmp.c
+++ b/arch/mips/kernel/smp-cmp.c
@@ -149,8 +149,8 @@ void __init cmp_prepare_cpus(unsigned int max_cpus)
}
struct plat_smp_ops cmp_smp_ops = {
- .send_ipi_single = gic_send_ipi_single,
- .send_ipi_mask = gic_send_ipi_mask,
+ .send_ipi_single = mips_smp_send_ipi_single,
+ .send_ipi_mask = mips_smp_send_ipi_mask,
.init_secondary = cmp_init_secondary,
.smp_finish = cmp_smp_finish,
.boot_secondary = cmp_boot_secondary,
diff --git a/arch/mips/kernel/smp-cps.c b/arch/mips/kernel/smp-cps.c
index 2ad4e4c96d61c..253e1409338c1 100644
--- a/arch/mips/kernel/smp-cps.c
+++ b/arch/mips/kernel/smp-cps.c
@@ -472,8 +472,8 @@ static struct plat_smp_ops cps_smp_ops = {
.boot_secondary = cps_boot_secondary,
.init_secondary = cps_init_secondary,
.smp_finish = cps_smp_finish,
- .send_ipi_single = gic_send_ipi_single,
- .send_ipi_mask = gic_send_ipi_mask,
+ .send_ipi_single = mips_smp_send_ipi_single,
+ .send_ipi_mask = mips_smp_send_ipi_mask,
#ifdef CONFIG_HOTPLUG_CPU
.cpu_disable = cps_cpu_disable,
.cpu_die = cps_cpu_die,
diff --git a/arch/mips/kernel/smp-mt.c b/arch/mips/kernel/smp-mt.c
index 86311a164ef12..4f9570a57e8d8 100644
--- a/arch/mips/kernel/smp-mt.c
+++ b/arch/mips/kernel/smp-mt.c
@@ -121,7 +121,7 @@ static void vsmp_send_ipi_single(int cpu, unsigned int action)
#ifdef CONFIG_MIPS_GIC
if (gic_present) {
- gic_send_ipi_single(cpu, action);
+ mips_smp_send_ipi_single(cpu, action);
return;
}
#endif
diff --git a/arch/mips/kernel/smp.c b/arch/mips/kernel/smp.c
index bd4385a8e6e86..37708d9af6381 100644
--- a/arch/mips/kernel/smp.c
+++ b/arch/mips/kernel/smp.c
@@ -33,12 +33,16 @@
#include
#include
#include
+#include
+#include
+#include
#include
#include
#include
#include
#include
+#include
#include
#include
#include
@@ -79,6 +83,11 @@ static cpumask_t cpu_core_setup_map;
cpumask_t cpu_coherent_mask;
+#ifdef CONFIG_GENERIC_IRQ_IPI
+static struct irq_desc *call_desc;
+static struct irq_desc *sched_desc;
+#endif
+
static inline void set_cpu_sibling_map(int cpu)
{
int i;
@@ -121,6 +130,7 @@ static inline void calculate_cpu_foreign_map(void)
cpumask_t temp_foreign_map;
/* Re-calculate the mask */
+ cpumask_clear(&temp_foreign_map);
for_each_online_cpu(i) {
core_present = 0;
for_each_cpu(k, &temp_foreign_map)
@@ -145,6 +155,133 @@ void register_smp_ops(struct plat_smp_ops *ops)
mp_ops = ops;
}
+#ifdef CONFIG_GENERIC_IRQ_IPI
+void mips_smp_send_ipi_single(int cpu, unsigned int action)
+{
+ mips_smp_send_ipi_mask(cpumask_of(cpu), action);
+}
+
+void mips_smp_send_ipi_mask(const struct cpumask *mask, unsigned int action)
+{
+ unsigned long flags;
+ unsigned int core;
+ int cpu;
+
+ local_irq_save(flags);
+
+ switch (action) {
+ case SMP_CALL_FUNCTION:
+ __ipi_send_mask(call_desc, mask);
+ break;
+
+ case SMP_RESCHEDULE_YOURSELF:
+ __ipi_send_mask(sched_desc, mask);
+ break;
+
+ default:
+ BUG();
+ }
+
+ if (mips_cpc_present()) {
+ for_each_cpu(cpu, mask) {
+ core = cpu_data[cpu].core;
+
+ if (core == current_cpu_data.core)
+ continue;
+
+ while (!cpumask_test_cpu(cpu, &cpu_coherent_mask)) {
+ mips_cpc_lock_other(core);
+ write_cpc_co_cmd(CPC_Cx_CMD_PWRUP);
+ mips_cpc_unlock_other();
+ }
+ }
+ }
+
+ local_irq_restore(flags);
+}
+
+
+static irqreturn_t ipi_resched_interrupt(int irq, void *dev_id)
+{
+ scheduler_ipi();
+
+ return IRQ_HANDLED;
+}
+
+static irqreturn_t ipi_call_interrupt(int irq, void *dev_id)
+{
+ generic_smp_call_function_interrupt();
+
+ return IRQ_HANDLED;
+}
+
+static struct irqaction irq_resched = {
+ .handler = ipi_resched_interrupt,
+ .flags = IRQF_PERCPU,
+ .name = "IPI resched"
+};
+
+static struct irqaction irq_call = {
+ .handler = ipi_call_interrupt,
+ .flags = IRQF_PERCPU,
+ .name = "IPI call"
+};
+
+static __init void smp_ipi_init_one(unsigned int virq,
+ struct irqaction *action)
+{
+ int ret;
+
+ irq_set_handler(virq, handle_percpu_irq);
+ ret = setup_irq(virq, action);
+ BUG_ON(ret);
+}
+
+static int __init mips_smp_ipi_init(void)
+{
+ unsigned int call_virq, sched_virq;
+ struct irq_domain *ipidomain;
+ struct device_node *node;
+
+ node = of_irq_find_parent(of_root);
+ ipidomain = irq_find_matching_host(node, DOMAIN_BUS_IPI);
+
+ /*
+ * Some platforms have half DT setup. So if we found irq node but
+ * didn't find an ipidomain, try to search for one that is not in the
+ * DT.
+ */
+ if (node && !ipidomain)
+ ipidomain = irq_find_matching_host(NULL, DOMAIN_BUS_IPI);
+
+ BUG_ON(!ipidomain);
+
+ call_virq = irq_reserve_ipi(ipidomain, cpu_possible_mask);
+ BUG_ON(!call_virq);
+
+ sched_virq = irq_reserve_ipi(ipidomain, cpu_possible_mask);
+ BUG_ON(!sched_virq);
+
+ if (irq_domain_is_ipi_per_cpu(ipidomain)) {
+ int cpu;
+
+ for_each_cpu(cpu, cpu_possible_mask) {
+ smp_ipi_init_one(call_virq + cpu, &irq_call);
+ smp_ipi_init_one(sched_virq + cpu, &irq_resched);
+ }
+ } else {
+ smp_ipi_init_one(call_virq, &irq_call);
+ smp_ipi_init_one(sched_virq, &irq_resched);
+ }
+
+ call_desc = irq_to_desc(call_virq);
+ sched_desc = irq_to_desc(sched_virq);
+
+ return 0;
+}
+early_initcall(mips_smp_ipi_init);
+#endif
+
/*
* First C code run on the secondary CPUs after being started up by
* the master.
@@ -191,7 +328,7 @@ asmlinkage void start_secondary(void)
WARN_ON_ONCE(!irqs_disabled());
mp_ops->smp_finish();
- cpu_startup_entry(CPUHP_ONLINE);
+ cpu_startup_entry(CPUHP_AP_ONLINE_IDLE);
}
static void stop_this_cpu(void *dummy)
diff --git a/arch/mips/kernel/traps.c b/arch/mips/kernel/traps.c
index ae790c575d4fe..bf14da9f3e33b 100644
--- a/arch/mips/kernel/traps.c
+++ b/arch/mips/kernel/traps.c
@@ -690,15 +690,15 @@ static int simulate_sync(struct pt_regs *regs, unsigned int opcode)
asmlinkage void do_ov(struct pt_regs *regs)
{
enum ctx_state prev_state;
- siginfo_t info;
+ siginfo_t info = {
+ .si_signo = SIGFPE,
+ .si_code = FPE_INTOVF,
+ .si_addr = (void __user *)regs->cp0_epc,
+ };
prev_state = exception_enter();
die_if_kernel("Integer overflow", regs);
- info.si_code = FPE_INTOVF;
- info.si_signo = SIGFPE;
- info.si_errno = 0;
- info.si_addr = (void __user *) regs->cp0_epc;
force_sig_info(SIGFPE, &info, current);
exception_exit(prev_state);
}
@@ -874,7 +874,7 @@ asmlinkage void do_fpe(struct pt_regs *regs, unsigned long fcr31)
void do_trap_or_bp(struct pt_regs *regs, unsigned int code,
const char *str)
{
- siginfo_t info;
+ siginfo_t info = { 0 };
char b[40];
#ifdef CONFIG_KGDB_LOW_LEVEL_TRAP
@@ -903,7 +903,6 @@ void do_trap_or_bp(struct pt_regs *regs, unsigned int code,
else
info.si_code = FPE_INTOVF;
info.si_signo = SIGFPE;
- info.si_errno = 0;
info.si_addr = (void __user *) regs->cp0_epc;
force_sig_info(SIGFPE, &info, current);
break;
diff --git a/arch/mips/kvm/mips.c b/arch/mips/kvm/mips.c
index 8bc3977576e6a..70ef1a43c1148 100644
--- a/arch/mips/kvm/mips.c
+++ b/arch/mips/kvm/mips.c
@@ -445,8 +445,8 @@ int kvm_vcpu_ioctl_interrupt(struct kvm_vcpu *vcpu,
dvcpu->arch.wait = 0;
- if (waitqueue_active(&dvcpu->wq))
- wake_up_interruptible(&dvcpu->wq);
+ if (swait_active(&dvcpu->wq))
+ swake_up(&dvcpu->wq);
return 0;
}
@@ -702,7 +702,7 @@ static int kvm_mips_get_reg(struct kvm_vcpu *vcpu,
} else if ((reg->id & KVM_REG_SIZE_MASK) == KVM_REG_SIZE_U128) {
void __user *uaddr = (void __user *)(long)reg->addr;
- return copy_to_user(uaddr, vs, 16);
+ return copy_to_user(uaddr, vs, 16) ? -EFAULT : 0;
} else {
return -EINVAL;
}
@@ -732,7 +732,7 @@ static int kvm_mips_set_reg(struct kvm_vcpu *vcpu,
} else if ((reg->id & KVM_REG_SIZE_MASK) == KVM_REG_SIZE_U128) {
void __user *uaddr = (void __user *)(long)reg->addr;
- return copy_from_user(vs, uaddr, 16);
+ return copy_from_user(vs, uaddr, 16) ? -EFAULT : 0;
} else {
return -EINVAL;
}
@@ -1174,8 +1174,8 @@ static void kvm_mips_comparecount_func(unsigned long data)
kvm_mips_callbacks->queue_timer_int(vcpu);
vcpu->arch.wait = 0;
- if (waitqueue_active(&vcpu->wq))
- wake_up_interruptible(&vcpu->wq);
+ if (swait_active(&vcpu->wq))
+ swake_up(&vcpu->wq);
}
/* low level hrtimer wake routine */
diff --git a/arch/mips/mm/sc-mips.c b/arch/mips/mm/sc-mips.c
index 249647578e587..91dec32c77b72 100644
--- a/arch/mips/mm/sc-mips.c
+++ b/arch/mips/mm/sc-mips.c
@@ -164,11 +164,13 @@ static int __init mips_sc_probe_cm3(void)
sets = cfg & CM_GCR_L2_CONFIG_SET_SIZE_MSK;
sets >>= CM_GCR_L2_CONFIG_SET_SIZE_SHF;
- c->scache.sets = 64 << sets;
+ if (sets)
+ c->scache.sets = 64 << sets;
line_sz = cfg & CM_GCR_L2_CONFIG_LINE_SIZE_MSK;
line_sz >>= CM_GCR_L2_CONFIG_LINE_SIZE_SHF;
- c->scache.linesz = 2 << line_sz;
+ if (line_sz)
+ c->scache.linesz = 2 << line_sz;
assoc = cfg & CM_GCR_L2_CONFIG_ASSOC_MSK;
assoc >>= CM_GCR_L2_CONFIG_ASSOC_SHF;
@@ -176,9 +178,12 @@ static int __init mips_sc_probe_cm3(void)
c->scache.waysize = c->scache.sets * c->scache.linesz;
c->scache.waybit = __ffs(c->scache.waysize);
- c->scache.flags &= ~MIPS_CACHE_NOT_PRESENT;
+ if (c->scache.linesz) {
+ c->scache.flags &= ~MIPS_CACHE_NOT_PRESENT;
+ return 1;
+ }
- return 1;
+ return 0;
}
static inline int __init mips_sc_probe(void)
diff --git a/arch/mn10300/kernel/smp.c b/arch/mn10300/kernel/smp.c
index f984193718b10..426173c4b0b90 100644
--- a/arch/mn10300/kernel/smp.c
+++ b/arch/mn10300/kernel/smp.c
@@ -675,7 +675,7 @@ int __init start_secondary(void *unused)
#ifdef CONFIG_GENERIC_CLOCKEVENTS
init_clockevents();
#endif
- cpu_startup_entry(CPUHP_ONLINE);
+ cpu_startup_entry(CPUHP_AP_ONLINE_IDLE);
return 0;
}
diff --git a/arch/parisc/include/asm/cache.h b/arch/parisc/include/asm/cache.h
index 3d0e17bcc8e90..df0f52bd18b45 100644
--- a/arch/parisc/include/asm/cache.h
+++ b/arch/parisc/include/asm/cache.h
@@ -22,6 +22,9 @@
#define __read_mostly __attribute__((__section__(".data..read_mostly")))
+/* Read-only memory is marked before mark_rodata_ro() is called. */
+#define __ro_after_init __read_mostly
+
void parisc_cache_init(void); /* initializes cache-flushing */
void disable_sr_hashing_asm(int); /* low level support for above */
void disable_sr_hashing(void); /* turns off space register hashing */
diff --git a/arch/parisc/include/asm/cacheflush.h b/arch/parisc/include/asm/cacheflush.h
index 845272ce9cc58..7bd69bd43a018 100644
--- a/arch/parisc/include/asm/cacheflush.h
+++ b/arch/parisc/include/asm/cacheflush.h
@@ -121,10 +121,6 @@ flush_anon_page(struct vm_area_struct *vma, struct page *page, unsigned long vma
}
}
-#ifdef CONFIG_DEBUG_RODATA
-void mark_rodata_ro(void);
-#endif
-
#include
#define ARCH_HAS_KMAP
diff --git a/arch/parisc/include/asm/floppy.h b/arch/parisc/include/asm/floppy.h
index f84ff12574b73..6d8276cd25cad 100644
--- a/arch/parisc/include/asm/floppy.h
+++ b/arch/parisc/include/asm/floppy.h
@@ -33,7 +33,7 @@
* floppy accesses go through the track buffer.
*/
#define _CROSS_64KB(a,s,vdma) \
-(!vdma && ((unsigned long)(a)/K_64 != ((unsigned long)(a) + (s) - 1) / K_64))
+(!(vdma) && ((unsigned long)(a)/K_64 != ((unsigned long)(a) + (s) - 1) / K_64))
#define CROSS_64KB(a,s) _CROSS_64KB(a,s,use_virtual_dma & 1)
diff --git a/arch/parisc/include/uapi/asm/unistd.h b/arch/parisc/include/uapi/asm/unistd.h
index 35bdccbb20362..b75039f921164 100644
--- a/arch/parisc/include/uapi/asm/unistd.h
+++ b/arch/parisc/include/uapi/asm/unistd.h
@@ -361,8 +361,9 @@
#define __NR_membarrier (__NR_Linux + 343)
#define __NR_userfaultfd (__NR_Linux + 344)
#define __NR_mlock2 (__NR_Linux + 345)
+#define __NR_copy_file_range (__NR_Linux + 346)
-#define __NR_Linux_syscalls (__NR_mlock2 + 1)
+#define __NR_Linux_syscalls (__NR_copy_file_range + 1)
#define __IGNORE_select /* newselect */
diff --git a/arch/parisc/kernel/ptrace.c b/arch/parisc/kernel/ptrace.c
index 9585c81f755fc..ce0b2b4075c70 100644
--- a/arch/parisc/kernel/ptrace.c
+++ b/arch/parisc/kernel/ptrace.c
@@ -269,14 +269,19 @@ long compat_arch_ptrace(struct task_struct *child, compat_long_t request,
long do_syscall_trace_enter(struct pt_regs *regs)
{
- long ret = 0;
-
/* Do the secure computing check first. */
secure_computing_strict(regs->gr[20]);
if (test_thread_flag(TIF_SYSCALL_TRACE) &&
- tracehook_report_syscall_entry(regs))
- ret = -1L;
+ tracehook_report_syscall_entry(regs)) {
+ /*
+ * Tracing decided this syscall should not happen or the
+ * debugger stored an invalid system call number. Skip
+ * the system call and the system call restart handling.
+ */
+ regs->gr[20] = -1UL;
+ goto out;
+ }
#ifdef CONFIG_64BIT
if (!is_compat_task())
@@ -290,7 +295,8 @@ long do_syscall_trace_enter(struct pt_regs *regs)
regs->gr[24] & 0xffffffff,
regs->gr[23] & 0xffffffff);
- return ret ? : regs->gr[20];
+out:
+ return regs->gr[20];
}
void do_syscall_trace_exit(struct pt_regs *regs)
diff --git a/arch/parisc/kernel/smp.c b/arch/parisc/kernel/smp.c
index 52e85973a283c..c2a9cc55a62f6 100644
--- a/arch/parisc/kernel/smp.c
+++ b/arch/parisc/kernel/smp.c
@@ -305,7 +305,7 @@ void __init smp_callin(void)
local_irq_enable(); /* Interrupts have been off until now */
- cpu_startup_entry(CPUHP_ONLINE);
+ cpu_startup_entry(CPUHP_AP_ONLINE_IDLE);
/* NOTREACHED */
panic("smp_callin() AAAAaaaaahhhh....\n");
diff --git a/arch/parisc/kernel/syscall.S b/arch/parisc/kernel/syscall.S
index 3fbd7252a4b27..fbafa0d0e2bf8 100644
--- a/arch/parisc/kernel/syscall.S
+++ b/arch/parisc/kernel/syscall.S
@@ -343,7 +343,7 @@ tracesys_next:
#endif
comiclr,>>= __NR_Linux_syscalls, %r20, %r0
- b,n .Lsyscall_nosys
+ b,n .Ltracesys_nosys
LDREGX %r20(%r19), %r19
@@ -359,6 +359,9 @@ tracesys_next:
be 0(%sr7,%r19)
ldo R%tracesys_exit(%r2),%r2
+.Ltracesys_nosys:
+ ldo -ENOSYS(%r0),%r28 /* set errno */
+
/* Do *not* call this function on the gateway page, because it
makes a direct call to syscall_trace. */
diff --git a/arch/parisc/kernel/syscall_table.S b/arch/parisc/kernel/syscall_table.S
index d4ffcfbc98851..585d50fc75c0f 100644
--- a/arch/parisc/kernel/syscall_table.S
+++ b/arch/parisc/kernel/syscall_table.S
@@ -441,6 +441,7 @@
ENTRY_SAME(membarrier)
ENTRY_SAME(userfaultfd)
ENTRY_SAME(mlock2) /* 345 */
+ ENTRY_SAME(copy_file_range)
.ifne (. - 90b) - (__NR_Linux_syscalls * (91b - 90b))
diff --git a/arch/parisc/mm/init.c b/arch/parisc/mm/init.c
index 1b366c4776872..3c07d6b968772 100644
--- a/arch/parisc/mm/init.c
+++ b/arch/parisc/mm/init.c
@@ -55,12 +55,12 @@ signed char pfnnid_map[PFNNID_MAP_MAX] __read_mostly;
static struct resource data_resource = {
.name = "Kernel data",
- .flags = IORESOURCE_BUSY | IORESOURCE_MEM,
+ .flags = IORESOURCE_BUSY | IORESOURCE_SYSTEM_RAM,
};
static struct resource code_resource = {
.name = "Kernel code",
- .flags = IORESOURCE_BUSY | IORESOURCE_MEM,
+ .flags = IORESOURCE_BUSY | IORESOURCE_SYSTEM_RAM,
};
static struct resource pdcdata_resource = {
@@ -201,7 +201,7 @@ static void __init setup_bootmem(void)
res->name = "System RAM";
res->start = pmem_ranges[i].start_pfn << PAGE_SHIFT;
res->end = res->start + (pmem_ranges[i].pages << PAGE_SHIFT)-1;
- res->flags = IORESOURCE_MEM | IORESOURCE_BUSY;
+ res->flags = IORESOURCE_SYSTEM_RAM | IORESOURCE_BUSY;
request_resource(&iomem_resource, res);
}
diff --git a/arch/powerpc/include/asm/kvm_host.h b/arch/powerpc/include/asm/kvm_host.h
index 9d08d8cbed1a1..c98afa538b3ae 100644
--- a/arch/powerpc/include/asm/kvm_host.h
+++ b/arch/powerpc/include/asm/kvm_host.h
@@ -289,7 +289,7 @@ struct kvmppc_vcore {
struct list_head runnable_threads;
struct list_head preempt_list;
spinlock_t lock;
- wait_queue_head_t wq;
+ struct swait_queue_head wq;
spinlock_t stoltb_lock; /* protects stolen_tb and preempt_tb */
u64 stolen_tb;
u64 preempt_tb;
@@ -629,7 +629,7 @@ struct kvm_vcpu_arch {
u8 prodded;
u32 last_inst;
- wait_queue_head_t *wqp;
+ struct swait_queue_head *wqp;
struct kvmppc_vcore *vcore;
int ret;
int trap;
diff --git a/arch/powerpc/kernel/hw_breakpoint.c b/arch/powerpc/kernel/hw_breakpoint.c
index 05e804cdecaa9..aec9a1b1d25bc 100644
--- a/arch/powerpc/kernel/hw_breakpoint.c
+++ b/arch/powerpc/kernel/hw_breakpoint.c
@@ -109,8 +109,9 @@ void arch_unregister_hw_breakpoint(struct perf_event *bp)
* If the breakpoint is unregistered between a hw_breakpoint_handler()
* and the single_step_dabr_instruction(), then cleanup the breakpoint
* restoration variables to prevent dangling pointers.
+ * FIXME, this should not be using bp->ctx at all! Sayeth peterz.
*/
- if (bp->ctx && bp->ctx->task)
+ if (bp->ctx && bp->ctx->task && bp->ctx->task != ((void *)-1L))
bp->ctx->task->thread.last_hit_ubp = NULL;
}
diff --git a/arch/powerpc/kernel/setup_32.c b/arch/powerpc/kernel/setup_32.c
index ad8c9db612372..d544fa3117576 100644
--- a/arch/powerpc/kernel/setup_32.c
+++ b/arch/powerpc/kernel/setup_32.c
@@ -114,8 +114,6 @@ extern unsigned int memset_nocache_branch; /* Insn to be replaced by NOP */
notrace void __init machine_init(u64 dt_ptr)
{
- lockdep_init();
-
/* Enable early debugging if any specified (see udbg.h) */
udbg_early_init();
diff --git a/arch/powerpc/kernel/setup_64.c b/arch/powerpc/kernel/setup_64.c
index 5c03a6a9b0542..f98be8383a399 100644
--- a/arch/powerpc/kernel/setup_64.c
+++ b/arch/powerpc/kernel/setup_64.c
@@ -255,9 +255,6 @@ void __init early_setup(unsigned long dt_ptr)
setup_paca(&boot_paca);
fixup_boot_paca();
- /* Initialize lockdep early or else spinlocks will blow */
- lockdep_init();
-
/* -------- printk is now safe to use ------- */
/* Enable early debugging if any specified (see udbg.h) */
diff --git a/arch/powerpc/kernel/smp.c b/arch/powerpc/kernel/smp.c
index ec9ec2058d2d3..cc13d4c832916 100644
--- a/arch/powerpc/kernel/smp.c
+++ b/arch/powerpc/kernel/smp.c
@@ -727,7 +727,7 @@ void start_secondary(void *unused)
local_irq_enable();
- cpu_startup_entry(CPUHP_ONLINE);
+ cpu_startup_entry(CPUHP_AP_ONLINE_IDLE);
BUG();
}
diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c
index baeddb06811d7..f1187bb6dd4d7 100644
--- a/arch/powerpc/kvm/book3s_hv.c
+++ b/arch/powerpc/kvm/book3s_hv.c
@@ -114,11 +114,11 @@ static bool kvmppc_ipi_thread(int cpu)
static void kvmppc_fast_vcpu_kick_hv(struct kvm_vcpu *vcpu)
{
int cpu;
- wait_queue_head_t *wqp;
+ struct swait_queue_head *wqp;
wqp = kvm_arch_vcpu_wq(vcpu);
- if (waitqueue_active(wqp)) {
- wake_up_interruptible(wqp);
+ if (swait_active(wqp)) {
+ swake_up(wqp);
++vcpu->stat.halt_wakeup;
}
@@ -701,8 +701,8 @@ int kvmppc_pseries_do_hcall(struct kvm_vcpu *vcpu)
tvcpu->arch.prodded = 1;
smp_mb();
if (vcpu->arch.ceded) {
- if (waitqueue_active(&vcpu->wq)) {
- wake_up_interruptible(&vcpu->wq);
+ if (swait_active(&vcpu->wq)) {
+ swake_up(&vcpu->wq);
vcpu->stat.halt_wakeup++;
}
}
@@ -1459,7 +1459,7 @@ static struct kvmppc_vcore *kvmppc_vcore_create(struct kvm *kvm, int core)
INIT_LIST_HEAD(&vcore->runnable_threads);
spin_lock_init(&vcore->lock);
spin_lock_init(&vcore->stoltb_lock);
- init_waitqueue_head(&vcore->wq);
+ init_swait_queue_head(&vcore->wq);
vcore->preempt_tb = TB_NIL;
vcore->lpcr = kvm->arch.lpcr;
vcore->first_vcpuid = core * threads_per_subcore;
@@ -2531,10 +2531,9 @@ static void kvmppc_vcore_blocked(struct kvmppc_vcore *vc)
{
struct kvm_vcpu *vcpu;
int do_sleep = 1;
+ DECLARE_SWAITQUEUE(wait);
- DEFINE_WAIT(wait);
-
- prepare_to_wait(&vc->wq, &wait, TASK_INTERRUPTIBLE);
+ prepare_to_swait(&vc->wq, &wait, TASK_INTERRUPTIBLE);
/*
* Check one last time for pending exceptions and ceded state after
@@ -2548,7 +2547,7 @@ static void kvmppc_vcore_blocked(struct kvmppc_vcore *vc)
}
if (!do_sleep) {
- finish_wait(&vc->wq, &wait);
+ finish_swait(&vc->wq, &wait);
return;
}
@@ -2556,7 +2555,7 @@ static void kvmppc_vcore_blocked(struct kvmppc_vcore *vc)
trace_kvmppc_vcore_blocked(vc, 0);
spin_unlock(&vc->lock);
schedule();
- finish_wait(&vc->wq, &wait);
+ finish_swait(&vc->wq, &wait);
spin_lock(&vc->lock);
vc->vcore_state = VCORE_INACTIVE;
trace_kvmppc_vcore_blocked(vc, 1);
@@ -2612,7 +2611,7 @@ static int kvmppc_run_vcpu(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu)
kvmppc_start_thread(vcpu, vc);
trace_kvm_guest_enter(vcpu);
} else if (vc->vcore_state == VCORE_SLEEPING) {
- wake_up(&vc->wq);
+ swake_up(&vc->wq);
}
}
diff --git a/arch/powerpc/kvm/book3s_hv_rmhandlers.S b/arch/powerpc/kvm/book3s_hv_rmhandlers.S
index 6ee26de9a1ded..25ae2c9913c39 100644
--- a/arch/powerpc/kvm/book3s_hv_rmhandlers.S
+++ b/arch/powerpc/kvm/book3s_hv_rmhandlers.S
@@ -1370,6 +1370,20 @@ END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_207S)
std r6, VCPU_ACOP(r9)
stw r7, VCPU_GUEST_PID(r9)
std r8, VCPU_WORT(r9)
+ /*
+ * Restore various registers to 0, where non-zero values
+ * set by the guest could disrupt the host.
+ */
+ li r0, 0
+ mtspr SPRN_IAMR, r0
+ mtspr SPRN_CIABR, r0
+ mtspr SPRN_DAWRX, r0
+ mtspr SPRN_TCSCR, r0
+ mtspr SPRN_WORT, r0
+ /* Set MMCRS to 1<<31 to freeze and disable the SPMC counters */
+ li r0, 1
+ sldi r0, r0, 31
+ mtspr SPRN_MMCRS, r0
8:
/* Save and reset AMR and UAMOR before turning on the MMU */
diff --git a/arch/powerpc/mm/hugetlbpage-book3e.c b/arch/powerpc/mm/hugetlbpage-book3e.c
index 7e6d0880813fe..83a8be791e064 100644
--- a/arch/powerpc/mm/hugetlbpage-book3e.c
+++ b/arch/powerpc/mm/hugetlbpage-book3e.c
@@ -8,6 +8,8 @@
#include
#include
+#include
+
#ifdef CONFIG_PPC_FSL_BOOK3E
#ifdef CONFIG_PPC64
static inline int tlb1_next(void)
@@ -60,6 +62,14 @@ static inline void book3e_tlb_lock(void)
unsigned long tmp;
int token = smp_processor_id() + 1;
+ /*
+ * Besides being unnecessary in the absence of SMT, this
+ * check prevents trying to do lbarx/stbcx. on e5500 which
+ * doesn't implement either feature.
+ */
+ if (!cpu_has_feature(CPU_FTR_SMT))
+ return;
+
asm volatile("1: lbarx %0, 0, %1;"
"cmpwi %0, 0;"
"bne 2f;"
@@ -80,6 +90,9 @@ static inline void book3e_tlb_unlock(void)
{
struct paca_struct *paca = get_paca();
+ if (!cpu_has_feature(CPU_FTR_SMT))
+ return;
+
isync();
paca->tcd_ptr->lock = 0;
}
diff --git a/arch/powerpc/mm/mem.c b/arch/powerpc/mm/mem.c
index d0f0a514b04ed..f078a1f94fc26 100644
--- a/arch/powerpc/mm/mem.c
+++ b/arch/powerpc/mm/mem.c
@@ -541,7 +541,7 @@ static int __init add_system_ram_resources(void)
res->name = "System RAM";
res->start = base;
res->end = base + size - 1;
- res->flags = IORESOURCE_MEM | IORESOURCE_BUSY;
+ res->flags = IORESOURCE_SYSTEM_RAM | IORESOURCE_BUSY;
WARN_ON(request_resource(&iomem_resource, res) < 0);
}
}
diff --git a/arch/s390/include/asm/kvm_host.h b/arch/s390/include/asm/kvm_host.h
index 8959ebb6d2c9e..b0c8ad0799c7f 100644
--- a/arch/s390/include/asm/kvm_host.h
+++ b/arch/s390/include/asm/kvm_host.h
@@ -467,7 +467,7 @@ struct kvm_s390_irq_payload {
struct kvm_s390_local_interrupt {
spinlock_t lock;
struct kvm_s390_float_interrupt *float_int;
- wait_queue_head_t *wq;
+ struct swait_queue_head *wq;
atomic_t *cpuflags;
DECLARE_BITMAP(sigp_emerg_pending, KVM_MAX_VCPUS);
struct kvm_s390_irq_payload irq;
diff --git a/arch/s390/include/asm/mmu_context.h b/arch/s390/include/asm/mmu_context.h
index fb1b93ea3e3fe..e485817f7b1a9 100644
--- a/arch/s390/include/asm/mmu_context.h
+++ b/arch/s390/include/asm/mmu_context.h
@@ -15,17 +15,25 @@
static inline int init_new_context(struct task_struct *tsk,
struct mm_struct *mm)
{
+ spin_lock_init(&mm->context.list_lock);
+ INIT_LIST_HEAD(&mm->context.pgtable_list);
+ INIT_LIST_HEAD(&mm->context.gmap_list);
cpumask_clear(&mm->context.cpu_attach_mask);
atomic_set(&mm->context.attach_count, 0);
mm->context.flush_mm = 0;
- mm->context.asce_bits = _ASCE_TABLE_LENGTH | _ASCE_USER_BITS;
- mm->context.asce_bits |= _ASCE_TYPE_REGION3;
#ifdef CONFIG_PGSTE
mm->context.alloc_pgste = page_table_allocate_pgste;
mm->context.has_pgste = 0;
mm->context.use_skey = 0;
#endif
- mm->context.asce_limit = STACK_TOP_MAX;
+ if (mm->context.asce_limit == 0) {
+ /* context created by exec, set asce limit to 4TB */
+ mm->context.asce_bits = _ASCE_TABLE_LENGTH |
+ _ASCE_USER_BITS | _ASCE_TYPE_REGION3;
+ mm->context.asce_limit = STACK_TOP_MAX;
+ } else if (mm->context.asce_limit == (1UL << 31)) {
+ mm_inc_nr_pmds(mm);
+ }
crst_table_init((unsigned long *) mm->pgd, pgd_entry_type(mm));
return 0;
}
@@ -111,8 +119,6 @@ static inline void activate_mm(struct mm_struct *prev,
static inline void arch_dup_mmap(struct mm_struct *oldmm,
struct mm_struct *mm)
{
- if (oldmm->context.asce_limit < mm->context.asce_limit)
- crst_table_downgrade(mm, oldmm->context.asce_limit);
}
static inline void arch_exit_mmap(struct mm_struct *mm)
diff --git a/arch/s390/include/asm/pgalloc.h b/arch/s390/include/asm/pgalloc.h
index 7b7858f158b45..d7cc79fb61911 100644
--- a/arch/s390/include/asm/pgalloc.h
+++ b/arch/s390/include/asm/pgalloc.h
@@ -100,12 +100,26 @@ static inline void pud_populate(struct mm_struct *mm, pud_t *pud, pmd_t *pmd)
static inline pgd_t *pgd_alloc(struct mm_struct *mm)
{
- spin_lock_init(&mm->context.list_lock);
- INIT_LIST_HEAD(&mm->context.pgtable_list);
- INIT_LIST_HEAD(&mm->context.gmap_list);
- return (pgd_t *) crst_table_alloc(mm);
+ unsigned long *table = crst_table_alloc(mm);
+
+ if (!table)
+ return NULL;
+ if (mm->context.asce_limit == (1UL << 31)) {
+ /* Forking a compat process with 2 page table levels */
+ if (!pgtable_pmd_page_ctor(virt_to_page(table))) {
+ crst_table_free(mm, table);
+ return NULL;
+ }
+ }
+ return (pgd_t *) table;
+}
+
+static inline void pgd_free(struct mm_struct *mm, pgd_t *pgd)
+{
+ if (mm->context.asce_limit == (1UL << 31))
+ pgtable_pmd_page_dtor(virt_to_page(pgd));
+ crst_table_free(mm, (unsigned long *) pgd);
}
-#define pgd_free(mm, pgd) crst_table_free(mm, (unsigned long *) pgd)
static inline void pmd_populate(struct mm_struct *mm,
pmd_t *pmd, pgtable_t pte)
diff --git a/arch/s390/kernel/early.c b/arch/s390/kernel/early.c
index c55576bbaa1f7..a0684de5a93b9 100644
--- a/arch/s390/kernel/early.c
+++ b/arch/s390/kernel/early.c
@@ -448,7 +448,6 @@ void __init startup_init(void)
rescue_initrd();
clear_bss_section();
init_kernel_storage_key();
- lockdep_init();
lockdep_off();
setup_lowcore_early();
setup_facility_list();
diff --git a/arch/s390/kernel/head64.S b/arch/s390/kernel/head64.S
index c5febe84eba63..03c2b469c4725 100644
--- a/arch/s390/kernel/head64.S
+++ b/arch/s390/kernel/head64.S
@@ -16,7 +16,7 @@
__HEAD
ENTRY(startup_continue)
- tm __LC_STFLE_FAC_LIST+6,0x80 # LPP available ?
+ tm __LC_STFLE_FAC_LIST+5,0x80 # LPP available ?
jz 0f
xc __LC_LPP+1(7,0),__LC_LPP+1 # clear lpp and current_pid
mvi __LC_LPP,0x80 # and set LPP_MAGIC
diff --git a/arch/s390/kernel/setup.c b/arch/s390/kernel/setup.c
index 9220db5c996aa..cedb0198675f7 100644
--- a/arch/s390/kernel/setup.c
+++ b/arch/s390/kernel/setup.c
@@ -374,17 +374,17 @@ static void __init setup_lowcore(void)
static struct resource code_resource = {
.name = "Kernel code",
- .flags = IORESOURCE_BUSY | IORESOURCE_MEM,
+ .flags = IORESOURCE_BUSY | IORESOURCE_SYSTEM_RAM,
};
static struct resource data_resource = {
.name = "Kernel data",
- .flags = IORESOURCE_BUSY | IORESOURCE_MEM,
+ .flags = IORESOURCE_BUSY | IORESOURCE_SYSTEM_RAM,
};
static struct resource bss_resource = {
.name = "Kernel bss",
- .flags = IORESOURCE_BUSY | IORESOURCE_MEM,
+ .flags = IORESOURCE_BUSY | IORESOURCE_SYSTEM_RAM,
};
static struct resource __initdata *standard_resources[] = {
@@ -408,7 +408,7 @@ static void __init setup_resources(void)
for_each_memblock(memory, reg) {
res = alloc_bootmem_low(sizeof(*res));
- res->flags = IORESOURCE_BUSY | IORESOURCE_MEM;
+ res->flags = IORESOURCE_BUSY | IORESOURCE_SYSTEM_RAM;
res->name = "System RAM";
res->start = reg->base;
diff --git a/arch/s390/kernel/smp.c b/arch/s390/kernel/smp.c
index 3c65a8eae34d3..40a6b4f9c36ce 100644
--- a/arch/s390/kernel/smp.c
+++ b/arch/s390/kernel/smp.c
@@ -798,7 +798,7 @@ static void smp_start_secondary(void *cpuvoid)
set_cpu_online(smp_processor_id(), true);
inc_irq_stat(CPU_RST);
local_irq_enable();
- cpu_startup_entry(CPUHP_ONLINE);
+ cpu_startup_entry(CPUHP_AP_ONLINE_IDLE);
}
/* Upping and downing of CPUs */
diff --git a/arch/s390/kvm/interrupt.c b/arch/s390/kvm/interrupt.c
index f88ca72c3a77a..9ffc732217921 100644
--- a/arch/s390/kvm/interrupt.c
+++ b/arch/s390/kvm/interrupt.c
@@ -966,13 +966,13 @@ int kvm_s390_handle_wait(struct kvm_vcpu *vcpu)
void kvm_s390_vcpu_wakeup(struct kvm_vcpu *vcpu)
{
- if (waitqueue_active(&vcpu->wq)) {
+ if (swait_active(&vcpu->wq)) {
/*
* The vcpu gave up the cpu voluntarily, mark it as a good
* yield-candidate.
*/
vcpu->preempted = true;
- wake_up_interruptible(&vcpu->wq);
+ swake_up(&vcpu->wq);
vcpu->stat.halt_wakeup++;
}
}
diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c
index 4af21c771f9b3..03dfe9c667f4e 100644
--- a/arch/s390/kvm/kvm-s390.c
+++ b/arch/s390/kvm/kvm-s390.c
@@ -2381,7 +2381,7 @@ int kvm_s390_store_status_unloaded(struct kvm_vcpu *vcpu, unsigned long gpa)
/* manually convert vector registers if necessary */
if (MACHINE_HAS_VX) {
- convert_vx_to_fp(fprs, current->thread.fpu.vxrs);
+ convert_vx_to_fp(fprs, (__vector128 *) vcpu->run->s.regs.vrs);
rc = write_guest_abs(vcpu, gpa + __LC_FPREGS_SAVE_AREA,
fprs, 128);
} else {
diff --git a/arch/score/kernel/setup.c b/arch/score/kernel/setup.c
index b48459afefddb..f3a0649ab5217 100644
--- a/arch/score/kernel/setup.c
+++ b/arch/score/kernel/setup.c
@@ -101,7 +101,7 @@ static void __init resource_init(void)
res->name = "System RAM";
res->start = MEMORY_START;
res->end = MEMORY_START + MEMORY_SIZE - 1;
- res->flags = IORESOURCE_MEM | IORESOURCE_BUSY;
+ res->flags = IORESOURCE_SYSTEM_RAM | IORESOURCE_BUSY;
request_resource(&iomem_resource, res);
request_resource(res, &code_resource);
diff --git a/arch/sh/kernel/setup.c b/arch/sh/kernel/setup.c
index de19cfa768f20..3f1c18b28e8af 100644
--- a/arch/sh/kernel/setup.c
+++ b/arch/sh/kernel/setup.c
@@ -78,17 +78,17 @@ static char __initdata command_line[COMMAND_LINE_SIZE] = { 0, };
static struct resource code_resource = {
.name = "Kernel code",
- .flags = IORESOURCE_BUSY | IORESOURCE_MEM,
+ .flags = IORESOURCE_BUSY | IORESOURCE_SYSTEM_RAM,
};
static struct resource data_resource = {
.name = "Kernel data",
- .flags = IORESOURCE_BUSY | IORESOURCE_MEM,
+ .flags = IORESOURCE_BUSY | IORESOURCE_SYSTEM_RAM,
};
static struct resource bss_resource = {
.name = "Kernel bss",
- .flags = IORESOURCE_BUSY | IORESOURCE_MEM,
+ .flags = IORESOURCE_BUSY | IORESOURCE_SYSTEM_RAM,
};
unsigned long memory_start;
@@ -202,7 +202,7 @@ void __init __add_active_range(unsigned int nid, unsigned long start_pfn,
res->name = "System RAM";
res->start = start;
res->end = end - 1;
- res->flags = IORESOURCE_MEM | IORESOURCE_BUSY;
+ res->flags = IORESOURCE_SYSTEM_RAM | IORESOURCE_BUSY;
if (request_resource(&iomem_resource, res)) {
pr_err("unable to request memory_resource 0x%lx 0x%lx\n",
diff --git a/arch/sh/kernel/smp.c b/arch/sh/kernel/smp.c
index de6be008fc014..13f633add29ac 100644
--- a/arch/sh/kernel/smp.c
+++ b/arch/sh/kernel/smp.c
@@ -203,7 +203,7 @@ asmlinkage void start_secondary(void)
set_cpu_online(cpu, true);
per_cpu(cpu_state, cpu) = CPU_ONLINE;
- cpu_startup_entry(CPUHP_ONLINE);
+ cpu_startup_entry(CPUHP_AP_ONLINE_IDLE);
}
extern struct {
diff --git a/arch/sparc/Makefile b/arch/sparc/Makefile
index eaee14637d93e..8496a074bd0eb 100644
--- a/arch/sparc/Makefile
+++ b/arch/sparc/Makefile
@@ -24,7 +24,13 @@ LDFLAGS := -m elf32_sparc
export BITS := 32
UTS_MACHINE := sparc
+# We are adding -Wa,-Av8 to KBUILD_CFLAGS to deal with a specs bug in some
+# versions of gcc. Some gcc versions won't pass -Av8 to binutils when you
+# give -mcpu=v8. This silently worked with older bintutils versions but
+# does not any more.
KBUILD_CFLAGS += -m32 -mcpu=v8 -pipe -mno-fpu -fcall-used-g5 -fcall-used-g7
+KBUILD_CFLAGS += -Wa,-Av8
+
KBUILD_AFLAGS += -m32 -Wa,-Av8
else
diff --git a/arch/sparc/include/uapi/asm/unistd.h b/arch/sparc/include/uapi/asm/unistd.h
index 1c26d440d288d..b6de8b10a55b8 100644
--- a/arch/sparc/include/uapi/asm/unistd.h
+++ b/arch/sparc/include/uapi/asm/unistd.h
@@ -422,8 +422,9 @@
#define __NR_listen 354
#define __NR_setsockopt 355
#define __NR_mlock2 356
+#define __NR_copy_file_range 357
-#define NR_syscalls 357
+#define NR_syscalls 358
/* Bitmask values returned from kern_features system call. */
#define KERN_FEATURE_MIXED_MODE_STACK 0x00000001
diff --git a/arch/sparc/kernel/entry.S b/arch/sparc/kernel/entry.S
index 33c02b15f4785..a83707c83be80 100644
--- a/arch/sparc/kernel/entry.S
+++ b/arch/sparc/kernel/entry.S
@@ -948,7 +948,24 @@ linux_syscall_trace:
cmp %o0, 0
bne 3f
mov -ENOSYS, %o0
+
+ /* Syscall tracing can modify the registers. */
+ ld [%sp + STACKFRAME_SZ + PT_G1], %g1
+ sethi %hi(sys_call_table), %l7
+ ld [%sp + STACKFRAME_SZ + PT_I0], %i0
+ or %l7, %lo(sys_call_table), %l7
+ ld [%sp + STACKFRAME_SZ + PT_I1], %i1
+ ld [%sp + STACKFRAME_SZ + PT_I2], %i2
+ ld [%sp + STACKFRAME_SZ + PT_I3], %i3
+ ld [%sp + STACKFRAME_SZ + PT_I4], %i4
+ ld [%sp + STACKFRAME_SZ + PT_I5], %i5
+ cmp %g1, NR_syscalls
+ bgeu 3f
+ mov -ENOSYS, %o0
+
+ sll %g1, 2, %l4
mov %i0, %o0
+ ld [%l7 + %l4], %l7
mov %i1, %o1
mov %i2, %o2
mov %i3, %o3
diff --git a/arch/sparc/kernel/head_64.S b/arch/sparc/kernel/head_64.S
index f2d30cab5b3f3..cd1f592cd3479 100644
--- a/arch/sparc/kernel/head_64.S
+++ b/arch/sparc/kernel/head_64.S
@@ -696,14 +696,6 @@ tlb_fixup_done:
call __bzero
sub %o1, %o0, %o1
-#ifdef CONFIG_LOCKDEP
- /* We have this call this super early, as even prom_init can grab
- * spinlocks and thus call into the lockdep code.
- */
- call lockdep_init
- nop
-#endif
-
call prom_init
mov %l7, %o0 ! OpenPROM cif handler
diff --git a/arch/sparc/kernel/hvcalls.S b/arch/sparc/kernel/hvcalls.S
index afbaba52d2f16..d127130bf4246 100644
--- a/arch/sparc/kernel/hvcalls.S
+++ b/arch/sparc/kernel/hvcalls.S
@@ -338,8 +338,9 @@ ENTRY(sun4v_mach_set_watchdog)
mov %o1, %o4
mov HV_FAST_MACH_SET_WATCHDOG, %o5
ta HV_FAST_TRAP
+ brnz,a,pn %o4, 0f
stx %o1, [%o4]
- retl
+0: retl
nop
ENDPROC(sun4v_mach_set_watchdog)
diff --git a/arch/sparc/kernel/signal_64.c b/arch/sparc/kernel/signal_64.c
index d88beff47bab3..39aaec173f66e 100644
--- a/arch/sparc/kernel/signal_64.c
+++ b/arch/sparc/kernel/signal_64.c
@@ -52,7 +52,7 @@ asmlinkage void sparc64_set_context(struct pt_regs *regs)
unsigned char fenab;
int err;
- flush_user_windows();
+ synchronize_user_stack();
if (get_thread_wsaved() ||
(((unsigned long)ucp) & (sizeof(unsigned long)-1)) ||
(!__access_ok(ucp, sizeof(*ucp))))
diff --git a/arch/sparc/kernel/smp_32.c b/arch/sparc/kernel/smp_32.c
index b3a5d81b20f0f..fb30e7c6a5b15 100644
--- a/arch/sparc/kernel/smp_32.c
+++ b/arch/sparc/kernel/smp_32.c
@@ -364,7 +364,7 @@ static void sparc_start_secondary(void *arg)
local_irq_enable();
wmb();
- cpu_startup_entry(CPUHP_ONLINE);
+ cpu_startup_entry(CPUHP_AP_ONLINE_IDLE);
/* We should never reach here! */
BUG();
diff --git a/arch/sparc/kernel/smp_64.c b/arch/sparc/kernel/smp_64.c
index 19cd08d186728..8a6151a628ce9 100644
--- a/arch/sparc/kernel/smp_64.c
+++ b/arch/sparc/kernel/smp_64.c
@@ -134,7 +134,7 @@ void smp_callin(void)
local_irq_enable();
- cpu_startup_entry(CPUHP_ONLINE);
+ cpu_startup_entry(CPUHP_AP_ONLINE_IDLE);
}
void cpu_panic(void)
diff --git a/arch/sparc/kernel/sparc_ksyms_64.c b/arch/sparc/kernel/sparc_ksyms_64.c
index a92d5d2c46a3a..9e034f29dcc52 100644
--- a/arch/sparc/kernel/sparc_ksyms_64.c
+++ b/arch/sparc/kernel/sparc_ksyms_64.c
@@ -37,6 +37,7 @@ EXPORT_SYMBOL(sun4v_niagara_getperf);
EXPORT_SYMBOL(sun4v_niagara_setperf);
EXPORT_SYMBOL(sun4v_niagara2_getperf);
EXPORT_SYMBOL(sun4v_niagara2_setperf);
+EXPORT_SYMBOL(sun4v_mach_set_watchdog);
/* from hweight.S */
EXPORT_SYMBOL(__arch_hweight8);
diff --git a/arch/sparc/kernel/syscalls.S b/arch/sparc/kernel/syscalls.S
index bb0008927598b..c4a1b5c40e4ef 100644
--- a/arch/sparc/kernel/syscalls.S
+++ b/arch/sparc/kernel/syscalls.S
@@ -158,7 +158,25 @@ linux_syscall_trace32:
add %sp, PTREGS_OFF, %o0
brnz,pn %o0, 3f
mov -ENOSYS, %o0
+
+ /* Syscall tracing can modify the registers. */
+ ldx [%sp + PTREGS_OFF + PT_V9_G1], %g1
+ sethi %hi(sys_call_table32), %l7
+ ldx [%sp + PTREGS_OFF + PT_V9_I0], %i0
+ or %l7, %lo(sys_call_table32), %l7
+ ldx [%sp + PTREGS_OFF + PT_V9_I1], %i1
+ ldx [%sp + PTREGS_OFF + PT_V9_I2], %i2
+ ldx [%sp + PTREGS_OFF + PT_V9_I3], %i3
+ ldx [%sp + PTREGS_OFF + PT_V9_I4], %i4
+ ldx [%sp + PTREGS_OFF + PT_V9_I5], %i5
+
+ cmp %g1, NR_syscalls
+ bgeu,pn %xcc, 3f
+ mov -ENOSYS, %o0
+
+ sll %g1, 2, %l4
srl %i0, 0, %o0
+ lduw [%l7 + %l4], %l7
srl %i4, 0, %o4
srl %i1, 0, %o1
srl %i2, 0, %o2
@@ -170,7 +188,25 @@ linux_syscall_trace:
add %sp, PTREGS_OFF, %o0
brnz,pn %o0, 3f
mov -ENOSYS, %o0
+
+ /* Syscall tracing can modify the registers. */
+ ldx [%sp + PTREGS_OFF + PT_V9_G1], %g1
+ sethi %hi(sys_call_table64), %l7
+ ldx [%sp + PTREGS_OFF + PT_V9_I0], %i0
+ or %l7, %lo(sys_call_table64), %l7
+ ldx [%sp + PTREGS_OFF + PT_V9_I1], %i1
+ ldx [%sp + PTREGS_OFF + PT_V9_I2], %i2
+ ldx [%sp + PTREGS_OFF + PT_V9_I3], %i3
+ ldx [%sp + PTREGS_OFF + PT_V9_I4], %i4
+ ldx [%sp + PTREGS_OFF + PT_V9_I5], %i5
+
+ cmp %g1, NR_syscalls
+ bgeu,pn %xcc, 3f
+ mov -ENOSYS, %o0
+
+ sll %g1, 2, %l4
mov %i0, %o0
+ lduw [%l7 + %l4], %l7
mov %i1, %o1
mov %i2, %o2
mov %i3, %o3
diff --git a/arch/sparc/kernel/systbls_32.S b/arch/sparc/kernel/systbls_32.S
index e663b6c78de2e..6c3dd6c52f8bd 100644
--- a/arch/sparc/kernel/systbls_32.S
+++ b/arch/sparc/kernel/systbls_32.S
@@ -88,4 +88,4 @@ sys_call_table:
/*340*/ .long sys_ni_syscall, sys_kcmp, sys_finit_module, sys_sched_setattr, sys_sched_getattr
/*345*/ .long sys_renameat2, sys_seccomp, sys_getrandom, sys_memfd_create, sys_bpf
/*350*/ .long sys_execveat, sys_membarrier, sys_userfaultfd, sys_bind, sys_listen
-/*355*/ .long sys_setsockopt, sys_mlock2
+/*355*/ .long sys_setsockopt, sys_mlock2, sys_copy_file_range
diff --git a/arch/sparc/kernel/systbls_64.S b/arch/sparc/kernel/systbls_64.S
index 1557121f4cdce..12b524cfcfa01 100644
--- a/arch/sparc/kernel/systbls_64.S
+++ b/arch/sparc/kernel/systbls_64.S
@@ -89,7 +89,7 @@ sys_call_table32:
/*340*/ .word sys_kern_features, sys_kcmp, sys_finit_module, sys_sched_setattr, sys_sched_getattr
.word sys32_renameat2, sys_seccomp, sys_getrandom, sys_memfd_create, sys_bpf
/*350*/ .word sys32_execveat, sys_membarrier, sys_userfaultfd, sys_bind, sys_listen
- .word compat_sys_setsockopt, sys_mlock2
+ .word compat_sys_setsockopt, sys_mlock2, sys_copy_file_range
#endif /* CONFIG_COMPAT */
@@ -170,4 +170,4 @@ sys_call_table:
/*340*/ .word sys_kern_features, sys_kcmp, sys_finit_module, sys_sched_setattr, sys_sched_getattr
.word sys_renameat2, sys_seccomp, sys_getrandom, sys_memfd_create, sys_bpf
/*350*/ .word sys64_execveat, sys_membarrier, sys_userfaultfd, sys_bind, sys_listen
- .word sys_setsockopt, sys_mlock2
+ .word sys_setsockopt, sys_mlock2, sys_copy_file_range
diff --git a/arch/sparc/mm/init_64.c b/arch/sparc/mm/init_64.c
index 6f216853f2724..1cfe6aab7a115 100644
--- a/arch/sparc/mm/init_64.c
+++ b/arch/sparc/mm/init_64.c
@@ -2863,17 +2863,17 @@ void hugetlb_setup(struct pt_regs *regs)
static struct resource code_resource = {
.name = "Kernel code",
- .flags = IORESOURCE_BUSY | IORESOURCE_MEM
+ .flags = IORESOURCE_BUSY | IORESOURCE_SYSTEM_RAM
};
static struct resource data_resource = {
.name = "Kernel data",
- .flags = IORESOURCE_BUSY | IORESOURCE_MEM
+ .flags = IORESOURCE_BUSY | IORESOURCE_SYSTEM_RAM
};
static struct resource bss_resource = {
.name = "Kernel bss",
- .flags = IORESOURCE_BUSY | IORESOURCE_MEM
+ .flags = IORESOURCE_BUSY | IORESOURCE_SYSTEM_RAM
};
static inline resource_size_t compute_kern_paddr(void *addr)
@@ -2909,7 +2909,7 @@ static int __init report_memory(void)
res->name = "System RAM";
res->start = pavail[i].phys_addr;
res->end = pavail[i].phys_addr + pavail[i].reg_size - 1;
- res->flags = IORESOURCE_BUSY | IORESOURCE_MEM;
+ res->flags = IORESOURCE_BUSY | IORESOURCE_SYSTEM_RAM;
if (insert_resource(&iomem_resource, res) < 0) {
pr_warn("Resource insertion failed.\n");
diff --git a/arch/tile/kernel/setup.c b/arch/tile/kernel/setup.c
index bbb855de65698..a992238e9b582 100644
--- a/arch/tile/kernel/setup.c
+++ b/arch/tile/kernel/setup.c
@@ -1632,14 +1632,14 @@ static struct resource data_resource = {
.name = "Kernel data",
.start = 0,
.end = 0,
- .flags = IORESOURCE_BUSY | IORESOURCE_MEM
+ .flags = IORESOURCE_BUSY | IORESOURCE_SYSTEM_RAM
};
static struct resource code_resource = {
.name = "Kernel code",
.start = 0,
.end = 0,
- .flags = IORESOURCE_BUSY | IORESOURCE_MEM
+ .flags = IORESOURCE_BUSY | IORESOURCE_SYSTEM_RAM
};
/*
@@ -1673,10 +1673,15 @@ insert_ram_resource(u64 start_pfn, u64 end_pfn, bool reserved)
kzalloc(sizeof(struct resource), GFP_ATOMIC);
if (!res)
return NULL;
- res->name = reserved ? "Reserved" : "System RAM";
res->start = start_pfn << PAGE_SHIFT;
res->end = (end_pfn << PAGE_SHIFT) - 1;
res->flags = IORESOURCE_BUSY | IORESOURCE_MEM;
+ if (reserved) {
+ res->name = "Reserved";
+ } else {
+ res->name = "System RAM";
+ res->flags |= IORESOURCE_SYSRAM;
+ }
if (insert_resource(&iomem_resource, res)) {
kfree(res);
return NULL;
diff --git a/arch/tile/kernel/smpboot.c b/arch/tile/kernel/smpboot.c
index 20d52a98e1716..6c0abaacec335 100644
--- a/arch/tile/kernel/smpboot.c
+++ b/arch/tile/kernel/smpboot.c
@@ -208,7 +208,7 @@ void online_secondary(void)
/* Set up tile-timer clock-event device on this cpu */
setup_tile_timer();
- cpu_startup_entry(CPUHP_ONLINE);
+ cpu_startup_entry(CPUHP_AP_ONLINE_IDLE);
}
int __cpu_up(unsigned int cpu, struct task_struct *tidle)
diff --git a/arch/um/kernel/reboot.c b/arch/um/kernel/reboot.c
index 9bdf67a092a53..b60a9f8cda755 100644
--- a/arch/um/kernel/reboot.c
+++ b/arch/um/kernel/reboot.c
@@ -12,6 +12,7 @@
#include
void (*pm_power_off)(void);
+EXPORT_SYMBOL(pm_power_off);
static void kill_off_processes(void)
{
diff --git a/arch/um/kernel/signal.c b/arch/um/kernel/signal.c
index fc8be0e3a4ff8..57acbd67d85db 100644
--- a/arch/um/kernel/signal.c
+++ b/arch/um/kernel/signal.c
@@ -69,7 +69,7 @@ void do_signal(struct pt_regs *regs)
struct ksignal ksig;
int handled_sig = 0;
- if (get_signal(&ksig)) {
+ while (get_signal(&ksig)) {
handled_sig = 1;
/* Whee! Actually deliver the signal. */
handle_signal(&ksig, regs);
diff --git a/arch/unicore32/kernel/setup.c b/arch/unicore32/kernel/setup.c
index 3fa317f961221..c2bffa5614a48 100644
--- a/arch/unicore32/kernel/setup.c
+++ b/arch/unicore32/kernel/setup.c
@@ -72,13 +72,13 @@ static struct resource mem_res[] = {
.name = "Kernel code",
.start = 0,
.end = 0,
- .flags = IORESOURCE_MEM
+ .flags = IORESOURCE_SYSTEM_RAM
},
{
.name = "Kernel data",
.start = 0,
.end = 0,
- .flags = IORESOURCE_MEM
+ .flags = IORESOURCE_SYSTEM_RAM
}
};
@@ -211,7 +211,7 @@ request_standard_resources(struct meminfo *mi)
res->name = "System RAM";
res->start = mi->bank[i].start;
res->end = mi->bank[i].start + mi->bank[i].size - 1;
- res->flags = IORESOURCE_MEM | IORESOURCE_BUSY;
+ res->flags = IORESOURCE_SYSTEM_RAM | IORESOURCE_BUSY;
request_resource(&iomem_resource, res);
diff --git a/arch/x86/Kbuild b/arch/x86/Kbuild
index 1538562cc720e..eb3abf8ac44eb 100644
--- a/arch/x86/Kbuild
+++ b/arch/x86/Kbuild
@@ -1,6 +1,7 @@
-
obj-y += entry/
+obj-$(CONFIG_PERF_EVENTS) += events/
+
obj-$(CONFIG_KVM) += kvm/
# Xen paravirtualization support
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index c46662f64c392..8f2e6659281b7 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -303,6 +303,9 @@ config ARCH_SUPPORTS_UPROBES
config FIX_EARLYCON_MEM
def_bool y
+config DEBUG_RODATA
+ def_bool y
+
config PGTABLE_LEVELS
int
default 4 if X86_64
@@ -1160,22 +1163,23 @@ config MICROCODE
bool "CPU microcode loading support"
default y
depends on CPU_SUP_AMD || CPU_SUP_INTEL
- depends on BLK_DEV_INITRD
select FW_LOADER
---help---
-
If you say Y here, you will be able to update the microcode on
- certain Intel and AMD processors. The Intel support is for the
- IA32 family, e.g. Pentium Pro, Pentium II, Pentium III, Pentium 4,
- Xeon etc. The AMD support is for families 0x10 and later. You will
- obviously need the actual microcode binary data itself which is not
- shipped with the Linux kernel.
-
- This option selects the general module only, you need to select
- at least one vendor specific module as well.
-
- To compile this driver as a module, choose M here: the module
- will be called microcode.
+ Intel and AMD processors. The Intel support is for the IA32 family,
+ e.g. Pentium Pro, Pentium II, Pentium III, Pentium 4, Xeon etc. The
+ AMD support is for families 0x10 and later. You will obviously need
+ the actual microcode binary data itself which is not shipped with
+ the Linux kernel.
+
+ The preferred method to load microcode from a detached initrd is described
+ in Documentation/x86/early-microcode.txt. For that you need to enable
+ CONFIG_BLK_DEV_INITRD in order for the loader to be able to scan the
+ initrd for microcode blobs.
+
+ In addition, you can build-in the microcode into the kernel. For that you
+ need to enable FIRMWARE_IN_KERNEL and add the vendor-supplied microcode
+ to the CONFIG_EXTRA_FIRMWARE config option.
config MICROCODE_INTEL
bool "Intel microcode loading support"
diff --git a/arch/x86/Kconfig.debug b/arch/x86/Kconfig.debug
index 9b18ed97a8a29..67eec55093a5d 100644
--- a/arch/x86/Kconfig.debug
+++ b/arch/x86/Kconfig.debug
@@ -74,28 +74,16 @@ config EFI_PGT_DUMP
issues with the mapping of the EFI runtime regions into that
table.
-config DEBUG_RODATA
- bool "Write protect kernel read-only data structures"
- default y
- depends on DEBUG_KERNEL
- ---help---
- Mark the kernel read-only data as write-protected in the pagetables,
- in order to catch accidental (and incorrect) writes to such const
- data. This is recommended so that we can catch kernel bugs sooner.
- If in doubt, say "Y".
-
config DEBUG_RODATA_TEST
- bool "Testcase for the DEBUG_RODATA feature"
- depends on DEBUG_RODATA
+ bool "Testcase for the marking rodata read-only"
default y
---help---
- This option enables a testcase for the DEBUG_RODATA
- feature as well as for the change_page_attr() infrastructure.
+ This option enables a testcase for the setting rodata read-only
+ as well as for the change_page_attr() infrastructure.
If in doubt, say "N"
config DEBUG_WX
bool "Warn on W+X mappings at boot"
- depends on DEBUG_RODATA
select X86_PTDUMP_CORE
---help---
Generate a warning if any W+X mappings are found at boot.
@@ -350,16 +338,6 @@ config DEBUG_IMR_SELFTEST
If unsure say N here.
-config X86_DEBUG_STATIC_CPU_HAS
- bool "Debug alternatives"
- depends on DEBUG_KERNEL
- ---help---
- This option causes additional code to be generated which
- fails if static_cpu_has() is used before alternatives have
- run.
-
- If unsure, say N.
-
config X86_DEBUG_FPU
bool "Debug the x86 FPU code"
depends on DEBUG_KERNEL
diff --git a/arch/x86/boot/cpuflags.h b/arch/x86/boot/cpuflags.h
index ea97697e51e40..4cb404fd45cea 100644
--- a/arch/x86/boot/cpuflags.h
+++ b/arch/x86/boot/cpuflags.h
@@ -1,7 +1,7 @@
#ifndef BOOT_CPUFLAGS_H
#define BOOT_CPUFLAGS_H
-#include
+#include
#include
struct cpu_features {
diff --git a/arch/x86/boot/mkcpustr.c b/arch/x86/boot/mkcpustr.c
index 637097e66a62a..f72498dc90d23 100644
--- a/arch/x86/boot/mkcpustr.c
+++ b/arch/x86/boot/mkcpustr.c
@@ -17,7 +17,7 @@
#include "../include/asm/required-features.h"
#include "../include/asm/disabled-features.h"
-#include "../include/asm/cpufeature.h"
+#include "../include/asm/cpufeatures.h"
#include "../kernel/cpu/capflags.c"
int main(void)
diff --git a/arch/x86/boot/tools/build.c b/arch/x86/boot/tools/build.c
index a7661c430cd98..0702d2531bc7f 100644
--- a/arch/x86/boot/tools/build.c
+++ b/arch/x86/boot/tools/build.c
@@ -49,7 +49,6 @@ typedef unsigned int u32;
/* This must be large enough to hold the entire setup */
u8 buf[SETUP_SECT_MAX*512];
-int is_big_kernel;
#define PECOFF_RELOC_RESERVE 0x20
diff --git a/arch/x86/configs/i386_defconfig b/arch/x86/configs/i386_defconfig
index 028be48c88397..e25a1630320cc 100644
--- a/arch/x86/configs/i386_defconfig
+++ b/arch/x86/configs/i386_defconfig
@@ -288,7 +288,7 @@ CONFIG_NLS_ISO8859_1=y
CONFIG_NLS_UTF8=y
CONFIG_PRINTK_TIME=y
# CONFIG_ENABLE_WARN_DEPRECATED is not set
-CONFIG_FRAME_WARN=2048
+CONFIG_FRAME_WARN=1024
CONFIG_MAGIC_SYSRQ=y
# CONFIG_UNUSED_SYMBOLS is not set
CONFIG_DEBUG_KERNEL=y
diff --git a/arch/x86/crypto/crc32-pclmul_glue.c b/arch/x86/crypto/crc32-pclmul_glue.c
index 07d2c6c86a548..27226df3f7d8a 100644
--- a/arch/x86/crypto/crc32-pclmul_glue.c
+++ b/arch/x86/crypto/crc32-pclmul_glue.c
@@ -33,7 +33,7 @@
#include
#include
-#include
+#include
#include
#include
diff --git a/arch/x86/crypto/crc32c-intel_glue.c b/arch/x86/crypto/crc32c-intel_glue.c
index 0e9871693f246..0857b1a1de3bd 100644
--- a/arch/x86/crypto/crc32c-intel_glue.c
+++ b/arch/x86/crypto/crc32c-intel_glue.c
@@ -30,7 +30,7 @@
#include
#include
-#include
+#include
#include
#include
diff --git a/arch/x86/crypto/crct10dif-pclmul_glue.c b/arch/x86/crypto/crct10dif-pclmul_glue.c
index a3fcfc97a311d..cd4df93225014 100644
--- a/arch/x86/crypto/crct10dif-pclmul_glue.c
+++ b/arch/x86/crypto/crct10dif-pclmul_glue.c
@@ -30,7 +30,7 @@
#include
#include
#include
-#include
+#include
#include
asmlinkage __u16 crc_t10dif_pcl(__u16 crc, const unsigned char *buf,
diff --git a/arch/x86/entry/calling.h b/arch/x86/entry/calling.h
index e32206e098682..9a9e5884066c6 100644
--- a/arch/x86/entry/calling.h
+++ b/arch/x86/entry/calling.h
@@ -201,37 +201,6 @@ For 32-bit we have the following conventions - kernel is built with
.byte 0xf1
.endm
-#else /* CONFIG_X86_64 */
-
-/*
- * For 32bit only simplified versions of SAVE_ALL/RESTORE_ALL. These
- * are different from the entry_32.S versions in not changing the segment
- * registers. So only suitable for in kernel use, not when transitioning
- * from or to user space. The resulting stack frame is not a standard
- * pt_regs frame. The main use case is calling C code from assembler
- * when all the registers need to be preserved.
- */
-
- .macro SAVE_ALL
- pushl %eax
- pushl %ebp
- pushl %edi
- pushl %esi
- pushl %edx
- pushl %ecx
- pushl %ebx
- .endm
-
- .macro RESTORE_ALL
- popl %ebx
- popl %ecx
- popl %edx
- popl %esi
- popl %edi
- popl %ebp
- popl %eax
- .endm
-
#endif /* CONFIG_X86_64 */
/*
diff --git a/arch/x86/entry/common.c b/arch/x86/entry/common.c
index 03663740c8665..e79d93d44ecd9 100644
--- a/arch/x86/entry/common.c
+++ b/arch/x86/entry/common.c
@@ -26,6 +26,7 @@
#include
#include
#include
+#include
#define CREATE_TRACE_POINTS
#include
@@ -44,6 +45,8 @@ __visible void enter_from_user_mode(void)
CT_WARN_ON(ct_state() != CONTEXT_USER);
user_exit();
}
+#else
+static inline void enter_from_user_mode(void) {}
#endif
static void do_audit_syscall_entry(struct pt_regs *regs, u32 arch)
@@ -84,17 +87,6 @@ unsigned long syscall_trace_enter_phase1(struct pt_regs *regs, u32 arch)
work = ACCESS_ONCE(ti->flags) & _TIF_WORK_SYSCALL_ENTRY;
-#ifdef CONFIG_CONTEXT_TRACKING
- /*
- * If TIF_NOHZ is set, we are required to call user_exit() before
- * doing anything that could touch RCU.
- */
- if (work & _TIF_NOHZ) {
- enter_from_user_mode();
- work &= ~_TIF_NOHZ;
- }
-#endif
-
#ifdef CONFIG_SECCOMP
/*
* Do seccomp first -- it should minimize exposure of other
@@ -171,16 +163,6 @@ long syscall_trace_enter_phase2(struct pt_regs *regs, u32 arch,
if (IS_ENABLED(CONFIG_DEBUG_ENTRY))
BUG_ON(regs != task_pt_regs(current));
- /*
- * If we stepped into a sysenter/syscall insn, it trapped in
- * kernel mode; do_debug() cleared TF and set TIF_SINGLESTEP.
- * If user-mode had set TF itself, then it's still clear from
- * do_debug() and we need to set it again to restore the user
- * state. If we entered on the slow path, TF was already set.
- */
- if (work & _TIF_SINGLESTEP)
- regs->flags |= X86_EFLAGS_TF;
-
#ifdef CONFIG_SECCOMP
/*
* Call seccomp_phase2 before running the other hooks so that
@@ -268,6 +250,7 @@ static void exit_to_usermode_loop(struct pt_regs *regs, u32 cached_flags)
/* Called with IRQs disabled. */
__visible inline void prepare_exit_to_usermode(struct pt_regs *regs)
{
+ struct thread_info *ti = pt_regs_to_thread_info(regs);
u32 cached_flags;
if (IS_ENABLED(CONFIG_PROVE_LOCKING) && WARN_ON(!irqs_disabled()))
@@ -275,12 +258,22 @@ __visible inline void prepare_exit_to_usermode(struct pt_regs *regs)
lockdep_sys_exit();
- cached_flags =
- READ_ONCE(pt_regs_to_thread_info(regs)->flags);
+ cached_flags = READ_ONCE(ti->flags);
if (unlikely(cached_flags & EXIT_TO_USERMODE_LOOP_FLAGS))
exit_to_usermode_loop(regs, cached_flags);
+#ifdef CONFIG_COMPAT
+ /*
+ * Compat syscalls set TS_COMPAT. Make sure we clear it before
+ * returning to user mode. We need to clear it *after* signal
+ * handling, because syscall restart has a fixup for compat
+ * syscalls. The fixup is exercised by the ptrace_syscall_32
+ * selftest.
+ */
+ ti->status &= ~TS_COMPAT;
+#endif
+
user_enter();
}
@@ -332,33 +325,45 @@ __visible inline void syscall_return_slowpath(struct pt_regs *regs)
if (unlikely(cached_flags & SYSCALL_EXIT_WORK_FLAGS))
syscall_slow_exit_work(regs, cached_flags);
-#ifdef CONFIG_COMPAT
+ local_irq_disable();
+ prepare_exit_to_usermode(regs);
+}
+
+#ifdef CONFIG_X86_64
+__visible void do_syscall_64(struct pt_regs *regs)
+{
+ struct thread_info *ti = pt_regs_to_thread_info(regs);
+ unsigned long nr = regs->orig_ax;
+
+ enter_from_user_mode();
+ local_irq_enable();
+
+ if (READ_ONCE(ti->flags) & _TIF_WORK_SYSCALL_ENTRY)
+ nr = syscall_trace_enter(regs);
+
/*
- * Compat syscalls set TS_COMPAT. Make sure we clear it before
- * returning to user mode.
+ * NB: Native and x32 syscalls are dispatched from the same
+ * table. The only functional difference is the x32 bit in
+ * regs->orig_ax, which changes the behavior of some syscalls.
*/
- ti->status &= ~TS_COMPAT;
-#endif
+ if (likely((nr & __SYSCALL_MASK) < NR_syscalls)) {
+ regs->ax = sys_call_table[nr & __SYSCALL_MASK](
+ regs->di, regs->si, regs->dx,
+ regs->r10, regs->r8, regs->r9);
+ }
- local_irq_disable();
- prepare_exit_to_usermode(regs);
+ syscall_return_slowpath(regs);
}
+#endif
#if defined(CONFIG_X86_32) || defined(CONFIG_IA32_EMULATION)
/*
- * Does a 32-bit syscall. Called with IRQs on and does all entry and
- * exit work and returns with IRQs off. This function is extremely hot
- * in workloads that use it, and it's usually called from
+ * Does a 32-bit syscall. Called with IRQs on in CONTEXT_KERNEL. Does
+ * all entry and exit work and returns with IRQs off. This function is
+ * extremely hot in workloads that use it, and it's usually called from
* do_fast_syscall_32, so forcibly inline it to improve performance.
*/
-#ifdef CONFIG_X86_32
-/* 32-bit kernels use a trap gate for INT80, and the asm code calls here. */
-__visible
-#else
-/* 64-bit kernels use do_syscall_32_irqs_off() instead. */
-static
-#endif
-__always_inline void do_syscall_32_irqs_on(struct pt_regs *regs)
+static __always_inline void do_syscall_32_irqs_on(struct pt_regs *regs)
{
struct thread_info *ti = pt_regs_to_thread_info(regs);
unsigned int nr = (unsigned int)regs->orig_ax;
@@ -393,14 +398,13 @@ __always_inline void do_syscall_32_irqs_on(struct pt_regs *regs)
syscall_return_slowpath(regs);
}
-#ifdef CONFIG_X86_64
-/* Handles INT80 on 64-bit kernels */
-__visible void do_syscall_32_irqs_off(struct pt_regs *regs)
+/* Handles int $0x80 */
+__visible void do_int80_syscall_32(struct pt_regs *regs)
{
+ enter_from_user_mode();
local_irq_enable();
do_syscall_32_irqs_on(regs);
}
-#endif
/* Returns 0 to return using IRET or 1 to return using SYSEXIT/SYSRETL. */
__visible long do_fast_syscall_32(struct pt_regs *regs)
@@ -420,12 +424,11 @@ __visible long do_fast_syscall_32(struct pt_regs *regs)
*/
regs->ip = landing_pad;
- /*
- * Fetch EBP from where the vDSO stashed it.
- *
- * WARNING: We are in CONTEXT_USER and RCU isn't paying attention!
- */
+ enter_from_user_mode();
+
local_irq_enable();
+
+ /* Fetch EBP from where the vDSO stashed it. */
if (
#ifdef CONFIG_X86_64
/*
@@ -443,9 +446,6 @@ __visible long do_fast_syscall_32(struct pt_regs *regs)
/* User code screwed up. */
local_irq_disable();
regs->ax = -EFAULT;
-#ifdef CONFIG_CONTEXT_TRACKING
- enter_from_user_mode();
-#endif
prepare_exit_to_usermode(regs);
return 0; /* Keep it simple: use IRET. */
}
diff --git a/arch/x86/entry/entry_32.S b/arch/x86/entry/entry_32.S
index bb3e376d0f33c..10868aa734dc0 100644
--- a/arch/x86/entry/entry_32.S
+++ b/arch/x86/entry/entry_32.S
@@ -40,7 +40,7 @@
#include
#include
#include
-#include
+#include
#include
#include
#include
@@ -287,20 +287,93 @@ need_resched:
END(resume_kernel)
#endif
- # SYSENTER call handler stub
+GLOBAL(__begin_SYSENTER_singlestep_region)
+/*
+ * All code from here through __end_SYSENTER_singlestep_region is subject
+ * to being single-stepped if a user program sets TF and executes SYSENTER.
+ * There is absolutely nothing that we can do to prevent this from happening
+ * (thanks Intel!). To keep our handling of this situation as simple as
+ * possible, we handle TF just like AC and NT, except that our #DB handler
+ * will ignore all of the single-step traps generated in this range.
+ */
+
+#ifdef CONFIG_XEN
+/*
+ * Xen doesn't set %esp to be precisely what the normal SYSENTER
+ * entry point expects, so fix it up before using the normal path.
+ */
+ENTRY(xen_sysenter_target)
+ addl $5*4, %esp /* remove xen-provided frame */
+ jmp sysenter_past_esp
+#endif
+
+/*
+ * 32-bit SYSENTER entry.
+ *
+ * 32-bit system calls through the vDSO's __kernel_vsyscall enter here
+ * if X86_FEATURE_SEP is available. This is the preferred system call
+ * entry on 32-bit systems.
+ *
+ * The SYSENTER instruction, in principle, should *only* occur in the
+ * vDSO. In practice, a small number of Android devices were shipped
+ * with a copy of Bionic that inlined a SYSENTER instruction. This
+ * never happened in any of Google's Bionic versions -- it only happened
+ * in a narrow range of Intel-provided versions.
+ *
+ * SYSENTER loads SS, ESP, CS, and EIP from previously programmed MSRs.
+ * IF and VM in RFLAGS are cleared (IOW: interrupts are off).
+ * SYSENTER does not save anything on the stack,
+ * and does not save old EIP (!!!), ESP, or EFLAGS.
+ *
+ * To avoid losing track of EFLAGS.VM (and thus potentially corrupting
+ * user and/or vm86 state), we explicitly disable the SYSENTER
+ * instruction in vm86 mode by reprogramming the MSRs.
+ *
+ * Arguments:
+ * eax system call number
+ * ebx arg1
+ * ecx arg2
+ * edx arg3
+ * esi arg4
+ * edi arg5
+ * ebp user stack
+ * 0(%ebp) arg6
+ */
ENTRY(entry_SYSENTER_32)
movl TSS_sysenter_sp0(%esp), %esp
sysenter_past_esp:
pushl $__USER_DS /* pt_regs->ss */
pushl %ebp /* pt_regs->sp (stashed in bp) */
pushfl /* pt_regs->flags (except IF = 0) */
- ASM_CLAC /* Clear AC after saving FLAGS */
orl $X86_EFLAGS_IF, (%esp) /* Fix IF */
pushl $__USER_CS /* pt_regs->cs */
pushl $0 /* pt_regs->ip = 0 (placeholder) */
pushl %eax /* pt_regs->orig_ax */
SAVE_ALL pt_regs_ax=$-ENOSYS /* save rest */
+ /*
+ * SYSENTER doesn't filter flags, so we need to clear NT, AC
+ * and TF ourselves. To save a few cycles, we can check whether
+ * either was set instead of doing an unconditional popfq.
+ * This needs to happen before enabling interrupts so that
+ * we don't get preempted with NT set.
+ *
+ * If TF is set, we will single-step all the way to here -- do_debug
+ * will ignore all the traps. (Yes, this is slow, but so is
+ * single-stepping in general. This allows us to avoid having
+ * a more complicated code to handle the case where a user program
+ * forces us to single-step through the SYSENTER entry code.)
+ *
+ * NB.: .Lsysenter_fix_flags is a label with the code under it moved
+ * out-of-line as an optimization: NT is unlikely to be set in the
+ * majority of the cases and instead of polluting the I$ unnecessarily,
+ * we're keeping that code behind a branch which will predict as
+ * not-taken and therefore its instructions won't be fetched.
+ */
+ testl $X86_EFLAGS_NT|X86_EFLAGS_AC|X86_EFLAGS_TF, PT_EFLAGS(%esp)
+ jnz .Lsysenter_fix_flags
+.Lsysenter_flags_fixed:
+
/*
* User mode is traced as though IRQs are on, and SYSENTER
* turned them off.
@@ -326,6 +399,15 @@ sysenter_past_esp:
popl %ebp /* pt_regs->bp */
popl %eax /* pt_regs->ax */
+ /*
+ * Restore all flags except IF. (We restore IF separately because
+ * STI gives a one-instruction window in which we won't be interrupted,
+ * whereas POPF does not.)
+ */
+ addl $PT_EFLAGS-PT_DS, %esp /* point esp at pt_regs->flags */
+ btr $X86_EFLAGS_IF_BIT, (%esp)
+ popfl
+
/*
* Return back to the vDSO, which will pop ecx and edx.
* Don't bother with DS and ES (they already contain __USER_DS).
@@ -339,28 +421,63 @@ sysenter_past_esp:
.popsection
_ASM_EXTABLE(1b, 2b)
PTGS_TO_GS_EX
+
+.Lsysenter_fix_flags:
+ pushl $X86_EFLAGS_FIXED
+ popfl
+ jmp .Lsysenter_flags_fixed
+GLOBAL(__end_SYSENTER_singlestep_region)
ENDPROC(entry_SYSENTER_32)
- # system call handler stub
+/*
+ * 32-bit legacy system call entry.
+ *
+ * 32-bit x86 Linux system calls traditionally used the INT $0x80
+ * instruction. INT $0x80 lands here.
+ *
+ * This entry point can be used by any 32-bit perform system calls.
+ * Instances of INT $0x80 can be found inline in various programs and
+ * libraries. It is also used by the vDSO's __kernel_vsyscall
+ * fallback for hardware that doesn't support a faster entry method.
+ * Restarted 32-bit system calls also fall back to INT $0x80
+ * regardless of what instruction was originally used to do the system
+ * call. (64-bit programs can use INT $0x80 as well, but they can
+ * only run on 64-bit kernels and therefore land in
+ * entry_INT80_compat.)
+ *
+ * This is considered a slow path. It is not used by most libc
+ * implementations on modern hardware except during process startup.
+ *
+ * Arguments:
+ * eax system call number
+ * ebx arg1
+ * ecx arg2
+ * edx arg3
+ * esi arg4
+ * edi arg5
+ * ebp arg6
+ */
ENTRY(entry_INT80_32)
ASM_CLAC
pushl %eax /* pt_regs->orig_ax */
SAVE_ALL pt_regs_ax=$-ENOSYS /* save rest */
/*
- * User mode is traced as though IRQs are on. Unlike the 64-bit
- * case, INT80 is a trap gate on 32-bit kernels, so interrupts
- * are already on (unless user code is messing around with iopl).
+ * User mode is traced as though IRQs are on, and the interrupt gate
+ * turned them off.
*/
+ TRACE_IRQS_OFF
movl %esp, %eax
- call do_syscall_32_irqs_on
+ call do_int80_syscall_32
.Lsyscall_32_done:
restore_all:
TRACE_IRQS_IRET
restore_all_notrace:
#ifdef CONFIG_X86_ESPFIX32
+ ALTERNATIVE "jmp restore_nocheck", "", X86_BUG_ESPFIX
+
movl PT_EFLAGS(%esp), %eax # mix EFLAGS, SS and CS
/*
* Warning: PT_OLDSS(%esp) contains the wrong/random values if we
@@ -387,19 +504,6 @@ ENTRY(iret_exc )
#ifdef CONFIG_X86_ESPFIX32
ldt_ss:
-#ifdef CONFIG_PARAVIRT
- /*
- * The kernel can't run on a non-flat stack if paravirt mode
- * is active. Rather than try to fixup the high bits of
- * ESP, bypass this code entirely. This may break DOSemu
- * and/or Wine support in a paravirt VM, although the option
- * is still available to implement the setting of the high
- * 16-bits in the INTERRUPT_RETURN paravirt-op.
- */
- cmpl $0, pv_info+PARAVIRT_enabled
- jne restore_nocheck
-#endif
-
/*
* Setup and switch to ESPFIX stack
*
@@ -632,14 +736,6 @@ ENTRY(spurious_interrupt_bug)
END(spurious_interrupt_bug)
#ifdef CONFIG_XEN
-/*
- * Xen doesn't set %esp to be precisely what the normal SYSENTER
- * entry point expects, so fix it up before using the normal path.
- */
-ENTRY(xen_sysenter_target)
- addl $5*4, %esp /* remove xen-provided frame */
- jmp sysenter_past_esp
-
ENTRY(xen_hypervisor_callback)
pushl $-1 /* orig_ax = -1 => not a system call */
SAVE_ALL
@@ -939,51 +1035,48 @@ error_code:
jmp ret_from_exception
END(page_fault)
-/*
- * Debug traps and NMI can happen at the one SYSENTER instruction
- * that sets up the real kernel stack. Check here, since we can't
- * allow the wrong stack to be used.
- *
- * "TSS_sysenter_sp0+12" is because the NMI/debug handler will have
- * already pushed 3 words if it hits on the sysenter instruction:
- * eflags, cs and eip.
- *
- * We just load the right stack, and push the three (known) values
- * by hand onto the new stack - while updating the return eip past
- * the instruction that would have done it for sysenter.
- */
-.macro FIX_STACK offset ok label
- cmpw $__KERNEL_CS, 4(%esp)
- jne \ok
-\label:
- movl TSS_sysenter_sp0 + \offset(%esp), %esp
- pushfl
- pushl $__KERNEL_CS
- pushl $sysenter_past_esp
-.endm
-
ENTRY(debug)
+ /*
+ * #DB can happen at the first instruction of
+ * entry_SYSENTER_32 or in Xen's SYSENTER prologue. If this
+ * happens, then we will be running on a very small stack. We
+ * need to detect this condition and switch to the thread
+ * stack before calling any C code at all.
+ *
+ * If you edit this code, keep in mind that NMIs can happen in here.
+ */
ASM_CLAC
- cmpl $entry_SYSENTER_32, (%esp)
- jne debug_stack_correct
- FIX_STACK 12, debug_stack_correct, debug_esp_fix_insn
-debug_stack_correct:
pushl $-1 # mark this as an int
SAVE_ALL
- TRACE_IRQS_OFF
xorl %edx, %edx # error code 0
movl %esp, %eax # pt_regs pointer
+
+ /* Are we currently on the SYSENTER stack? */
+ PER_CPU(cpu_tss + CPU_TSS_SYSENTER_stack + SIZEOF_SYSENTER_stack, %ecx)
+ subl %eax, %ecx /* ecx = (end of SYSENTER_stack) - esp */
+ cmpl $SIZEOF_SYSENTER_stack, %ecx
+ jb .Ldebug_from_sysenter_stack
+
+ TRACE_IRQS_OFF
+ call do_debug
+ jmp ret_from_exception
+
+.Ldebug_from_sysenter_stack:
+ /* We're on the SYSENTER stack. Switch off. */
+ movl %esp, %ebp
+ movl PER_CPU_VAR(cpu_current_top_of_stack), %esp
+ TRACE_IRQS_OFF
call do_debug
+ movl %ebp, %esp
jmp ret_from_exception
END(debug)
/*
- * NMI is doubly nasty. It can happen _while_ we're handling
- * a debug fault, and the debug fault hasn't yet been able to
- * clear up the stack. So we first check whether we got an
- * NMI on the sysenter entry path, but after that we need to
- * check whether we got an NMI on the debug path where the debug
- * fault happened on the sysenter path.
+ * NMI is doubly nasty. It can happen on the first instruction of
+ * entry_SYSENTER_32 (just like #DB), but it can also interrupt the beginning
+ * of the #DB handler even if that #DB in turn hit before entry_SYSENTER_32
+ * switched stacks. We handle both conditions by simply checking whether we
+ * interrupted kernel code running on the SYSENTER stack.
*/
ENTRY(nmi)
ASM_CLAC
@@ -994,41 +1087,32 @@ ENTRY(nmi)
popl %eax
je nmi_espfix_stack
#endif
- cmpl $entry_SYSENTER_32, (%esp)
- je nmi_stack_fixup
- pushl %eax
- movl %esp, %eax
- /*
- * Do not access memory above the end of our stack page,
- * it might not exist.
- */
- andl $(THREAD_SIZE-1), %eax
- cmpl $(THREAD_SIZE-20), %eax
- popl %eax
- jae nmi_stack_correct
- cmpl $entry_SYSENTER_32, 12(%esp)
- je nmi_debug_stack_check
-nmi_stack_correct:
- pushl %eax
+
+ pushl %eax # pt_regs->orig_ax
SAVE_ALL
xorl %edx, %edx # zero error code
movl %esp, %eax # pt_regs pointer
+
+ /* Are we currently on the SYSENTER stack? */
+ PER_CPU(cpu_tss + CPU_TSS_SYSENTER_stack + SIZEOF_SYSENTER_stack, %ecx)
+ subl %eax, %ecx /* ecx = (end of SYSENTER_stack) - esp */
+ cmpl $SIZEOF_SYSENTER_stack, %ecx
+ jb .Lnmi_from_sysenter_stack
+
+ /* Not on SYSENTER stack. */
call do_nmi
jmp restore_all_notrace
-nmi_stack_fixup:
- FIX_STACK 12, nmi_stack_correct, 1
- jmp nmi_stack_correct
-
-nmi_debug_stack_check:
- cmpw $__KERNEL_CS, 16(%esp)
- jne nmi_stack_correct
- cmpl $debug, (%esp)
- jb nmi_stack_correct
- cmpl $debug_esp_fix_insn, (%esp)
- ja nmi_stack_correct
- FIX_STACK 24, nmi_stack_correct, 1
- jmp nmi_stack_correct
+.Lnmi_from_sysenter_stack:
+ /*
+ * We're on the SYSENTER stack. Switch off. No one (not even debug)
+ * is using the thread stack right now, so it's safe for us to use it.
+ */
+ movl %esp, %ebp
+ movl PER_CPU_VAR(cpu_current_top_of_stack), %esp
+ call do_nmi
+ movl %ebp, %esp
+ jmp restore_all_notrace
#ifdef CONFIG_X86_ESPFIX32
nmi_espfix_stack:
diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S
index 9d34d3cfceb61..858b555e274b8 100644
--- a/arch/x86/entry/entry_64.S
+++ b/arch/x86/entry/entry_64.S
@@ -103,6 +103,16 @@ ENDPROC(native_usergs_sysret64)
/*
* 64-bit SYSCALL instruction entry. Up to 6 arguments in registers.
*
+ * This is the only entry point used for 64-bit system calls. The
+ * hardware interface is reasonably well designed and the register to
+ * argument mapping Linux uses fits well with the registers that are
+ * available when SYSCALL is used.
+ *
+ * SYSCALL instructions can be found inlined in libc implementations as
+ * well as some other programs and libraries. There are also a handful
+ * of SYSCALL instructions in the vDSO used, for example, as a
+ * clock_gettimeofday fallback.
+ *
* 64-bit SYSCALL saves rip to rcx, clears rflags.RF, then saves rflags to r11,
* then loads new ss, cs, and rip from previously programmed MSRs.
* rflags gets masked by a value from another MSR (so CLD and CLAC
@@ -145,17 +155,11 @@ GLOBAL(entry_SYSCALL_64_after_swapgs)
movq %rsp, PER_CPU_VAR(rsp_scratch)
movq PER_CPU_VAR(cpu_current_top_of_stack), %rsp
+ TRACE_IRQS_OFF
+
/* Construct struct pt_regs on stack */
pushq $__USER_DS /* pt_regs->ss */
pushq PER_CPU_VAR(rsp_scratch) /* pt_regs->sp */
- /*
- * Re-enable interrupts.
- * We use 'rsp_scratch' as a scratch space, hence irq-off block above
- * must execute atomically in the face of possible interrupt-driven
- * task preemption. We must enable interrupts only after we're done
- * with using rsp_scratch:
- */
- ENABLE_INTERRUPTS(CLBR_NONE)
pushq %r11 /* pt_regs->flags */
pushq $__USER_CS /* pt_regs->cs */
pushq %rcx /* pt_regs->ip */
@@ -171,9 +175,21 @@ GLOBAL(entry_SYSCALL_64_after_swapgs)
pushq %r11 /* pt_regs->r11 */
sub $(6*8), %rsp /* pt_regs->bp, bx, r12-15 not saved */
- testl $_TIF_WORK_SYSCALL_ENTRY, ASM_THREAD_INFO(TI_flags, %rsp, SIZEOF_PTREGS)
- jnz tracesys
+ /*
+ * If we need to do entry work or if we guess we'll need to do
+ * exit work, go straight to the slow path.
+ */
+ testl $_TIF_WORK_SYSCALL_ENTRY|_TIF_ALLWORK_MASK, ASM_THREAD_INFO(TI_flags, %rsp, SIZEOF_PTREGS)
+ jnz entry_SYSCALL64_slow_path
+
entry_SYSCALL_64_fastpath:
+ /*
+ * Easy case: enable interrupts and issue the syscall. If the syscall
+ * needs pt_regs, we'll call a stub that disables interrupts again
+ * and jumps to the slow path.
+ */
+ TRACE_IRQS_ON
+ ENABLE_INTERRUPTS(CLBR_NONE)
#if __SYSCALL_MASK == ~0
cmpq $__NR_syscall_max, %rax
#else
@@ -182,103 +198,56 @@ entry_SYSCALL_64_fastpath:
#endif
ja 1f /* return -ENOSYS (already in pt_regs->ax) */
movq %r10, %rcx
+
+ /*
+ * This call instruction is handled specially in stub_ptregs_64.
+ * It might end up jumping to the slow path. If it jumps, RAX
+ * and all argument registers are clobbered.
+ */
call *sys_call_table(, %rax, 8)
+.Lentry_SYSCALL_64_after_fastpath_call:
+
movq %rax, RAX(%rsp)
1:
-/*
- * Syscall return path ending with SYSRET (fast path).
- * Has incompletely filled pt_regs.
- */
- LOCKDEP_SYS_EXIT
- /*
- * We do not frame this tiny irq-off block with TRACE_IRQS_OFF/ON,
- * it is too small to ever cause noticeable irq latency.
- */
- DISABLE_INTERRUPTS(CLBR_NONE)
/*
- * We must check ti flags with interrupts (or at least preemption)
- * off because we must *never* return to userspace without
- * processing exit work that is enqueued if we're preempted here.
- * In particular, returning to userspace with any of the one-shot
- * flags (TIF_NOTIFY_RESUME, TIF_USER_RETURN_NOTIFY, etc) set is
- * very bad.
+ * If we get here, then we know that pt_regs is clean for SYSRET64.
+ * If we see that no exit work is required (which we are required
+ * to check with IRQs off), then we can go straight to SYSRET64.
*/
+ DISABLE_INTERRUPTS(CLBR_NONE)
+ TRACE_IRQS_OFF
testl $_TIF_ALLWORK_MASK, ASM_THREAD_INFO(TI_flags, %rsp, SIZEOF_PTREGS)
- jnz int_ret_from_sys_call_irqs_off /* Go to the slow path */
+ jnz 1f
- RESTORE_C_REGS_EXCEPT_RCX_R11
+ LOCKDEP_SYS_EXIT
+ TRACE_IRQS_ON /* user mode is traced as IRQs on */
movq RIP(%rsp), %rcx
movq EFLAGS(%rsp), %r11
+ RESTORE_C_REGS_EXCEPT_RCX_R11
movq RSP(%rsp), %rsp
- /*
- * 64-bit SYSRET restores rip from rcx,
- * rflags from r11 (but RF and VM bits are forced to 0),
- * cs and ss are loaded from MSRs.
- * Restoration of rflags re-enables interrupts.
- *
- * NB: On AMD CPUs with the X86_BUG_SYSRET_SS_ATTRS bug, the ss
- * descriptor is not reinitialized. This means that we should
- * avoid SYSRET with SS == NULL, which could happen if we schedule,
- * exit the kernel, and re-enter using an interrupt vector. (All
- * interrupt entries on x86_64 set SS to NULL.) We prevent that
- * from happening by reloading SS in __switch_to. (Actually
- * detecting the failure in 64-bit userspace is tricky but can be
- * done.)
- */
USERGS_SYSRET64
-GLOBAL(int_ret_from_sys_call_irqs_off)
+1:
+ /*
+ * The fast path looked good when we started, but something changed
+ * along the way and we need to switch to the slow path. Calling
+ * raise(3) will trigger this, for example. IRQs are off.
+ */
TRACE_IRQS_ON
ENABLE_INTERRUPTS(CLBR_NONE)
- jmp int_ret_from_sys_call
-
- /* Do syscall entry tracing */
-tracesys:
- movq %rsp, %rdi
- movl $AUDIT_ARCH_X86_64, %esi
- call syscall_trace_enter_phase1
- test %rax, %rax
- jnz tracesys_phase2 /* if needed, run the slow path */
- RESTORE_C_REGS_EXCEPT_RAX /* else restore clobbered regs */
- movq ORIG_RAX(%rsp), %rax
- jmp entry_SYSCALL_64_fastpath /* and return to the fast path */
-
-tracesys_phase2:
SAVE_EXTRA_REGS
movq %rsp, %rdi
- movl $AUDIT_ARCH_X86_64, %esi
- movq %rax, %rdx
- call syscall_trace_enter_phase2
-
- /*
- * Reload registers from stack in case ptrace changed them.
- * We don't reload %rax because syscall_trace_entry_phase2() returned
- * the value it wants us to use in the table lookup.
- */
- RESTORE_C_REGS_EXCEPT_RAX
- RESTORE_EXTRA_REGS
-#if __SYSCALL_MASK == ~0
- cmpq $__NR_syscall_max, %rax
-#else
- andl $__SYSCALL_MASK, %eax
- cmpl $__NR_syscall_max, %eax
-#endif
- ja 1f /* return -ENOSYS (already in pt_regs->ax) */
- movq %r10, %rcx /* fixup for C */
- call *sys_call_table(, %rax, 8)
- movq %rax, RAX(%rsp)
-1:
- /* Use IRET because user could have changed pt_regs->foo */
+ call syscall_return_slowpath /* returns with IRQs disabled */
+ jmp return_from_SYSCALL_64
-/*
- * Syscall return path ending with IRET.
- * Has correct iret frame.
- */
-GLOBAL(int_ret_from_sys_call)
+entry_SYSCALL64_slow_path:
+ /* IRQs are off. */
SAVE_EXTRA_REGS
movq %rsp, %rdi
- call syscall_return_slowpath /* returns with IRQs disabled */
+ call do_syscall_64 /* returns with IRQs disabled */
+
+return_from_SYSCALL_64:
RESTORE_EXTRA_REGS
TRACE_IRQS_IRETQ /* we're about to change IF */
@@ -355,83 +324,45 @@ opportunistic_sysret_failed:
jmp restore_c_regs_and_iret
END(entry_SYSCALL_64)
+ENTRY(stub_ptregs_64)
+ /*
+ * Syscalls marked as needing ptregs land here.
+ * If we are on the fast path, we need to save the extra regs,
+ * which we achieve by trying again on the slow path. If we are on
+ * the slow path, the extra regs are already saved.
+ *
+ * RAX stores a pointer to the C function implementing the syscall.
+ * IRQs are on.
+ */
+ cmpq $.Lentry_SYSCALL_64_after_fastpath_call, (%rsp)
+ jne 1f
- .macro FORK_LIKE func
-ENTRY(stub_\func)
- SAVE_EXTRA_REGS 8
- jmp sys_\func
-END(stub_\func)
- .endm
-
- FORK_LIKE clone
- FORK_LIKE fork
- FORK_LIKE vfork
-
-ENTRY(stub_execve)
- call sys_execve
-return_from_execve:
- testl %eax, %eax
- jz 1f
- /* exec failed, can use fast SYSRET code path in this case */
- ret
-1:
- /* must use IRET code path (pt_regs->cs may have changed) */
- addq $8, %rsp
- ZERO_EXTRA_REGS
- movq %rax, RAX(%rsp)
- jmp int_ret_from_sys_call
-END(stub_execve)
-/*
- * Remaining execve stubs are only 7 bytes long.
- * ENTRY() often aligns to 16 bytes, which in this case has no benefits.
- */
- .align 8
-GLOBAL(stub_execveat)
- call sys_execveat
- jmp return_from_execve
-END(stub_execveat)
-
-#if defined(CONFIG_X86_X32_ABI)
- .align 8
-GLOBAL(stub_x32_execve)
- call compat_sys_execve
- jmp return_from_execve
-END(stub_x32_execve)
- .align 8
-GLOBAL(stub_x32_execveat)
- call compat_sys_execveat
- jmp return_from_execve
-END(stub_x32_execveat)
-#endif
-
-/*
- * sigreturn is special because it needs to restore all registers on return.
- * This cannot be done with SYSRET, so use the IRET return path instead.
- */
-ENTRY(stub_rt_sigreturn)
/*
- * SAVE_EXTRA_REGS result is not normally needed:
- * sigreturn overwrites all pt_regs->GPREGS.
- * But sigreturn can fail (!), and there is no easy way to detect that.
- * To make sure RESTORE_EXTRA_REGS doesn't restore garbage on error,
- * we SAVE_EXTRA_REGS here.
+ * Called from fast path -- disable IRQs again, pop return address
+ * and jump to slow path
*/
- SAVE_EXTRA_REGS 8
- call sys_rt_sigreturn
-return_from_stub:
- addq $8, %rsp
- RESTORE_EXTRA_REGS
- movq %rax, RAX(%rsp)
- jmp int_ret_from_sys_call
-END(stub_rt_sigreturn)
+ DISABLE_INTERRUPTS(CLBR_NONE)
+ TRACE_IRQS_OFF
+ popq %rax
+ jmp entry_SYSCALL64_slow_path
-#ifdef CONFIG_X86_X32_ABI
-ENTRY(stub_x32_rt_sigreturn)
- SAVE_EXTRA_REGS 8
- call sys32_x32_rt_sigreturn
- jmp return_from_stub
-END(stub_x32_rt_sigreturn)
-#endif
+1:
+ /* Called from C */
+ jmp *%rax /* called from C */
+END(stub_ptregs_64)
+
+.macro ptregs_stub func
+ENTRY(ptregs_\func)
+ leaq \func(%rip), %rax
+ jmp stub_ptregs_64
+END(ptregs_\func)
+.endm
+
+/* Instantiate ptregs_stub for each ptregs-using syscall */
+#define __SYSCALL_64_QUAL_(sym)
+#define __SYSCALL_64_QUAL_ptregs(sym) ptregs_stub sym
+#define __SYSCALL_64(nr, sym, qual) __SYSCALL_64_QUAL_##qual(sym)
+#include
/*
* A newly forked process directly context switches into this address.
@@ -439,7 +370,6 @@ END(stub_x32_rt_sigreturn)
* rdi: prev task we switched from
*/
ENTRY(ret_from_fork)
-
LOCK ; btr $TIF_FORK, TI_flags(%r8)
pushq $0x0002
@@ -447,28 +377,32 @@ ENTRY(ret_from_fork)
call schedule_tail /* rdi: 'prev' task parameter */
- RESTORE_EXTRA_REGS
-
testb $3, CS(%rsp) /* from kernel_thread? */
+ jnz 1f
/*
- * By the time we get here, we have no idea whether our pt_regs,
- * ti flags, and ti status came from the 64-bit SYSCALL fast path,
- * the slow path, or one of the 32-bit compat paths.
- * Use IRET code path to return, since it can safely handle
- * all of the above.
+ * We came from kernel_thread. This code path is quite twisted, and
+ * someone should clean it up.
+ *
+ * copy_thread_tls stashes the function pointer in RBX and the
+ * parameter to be passed in RBP. The called function is permitted
+ * to call do_execve and thereby jump to user mode.
*/
- jnz int_ret_from_sys_call
+ movq RBP(%rsp), %rdi
+ call *RBX(%rsp)
+ movl $0, RAX(%rsp)
/*
- * We came from kernel_thread
- * nb: we depend on RESTORE_EXTRA_REGS above
+ * Fall through as though we're exiting a syscall. This makes a
+ * twisted sort of sense if we just called do_execve.
*/
- movq %rbp, %rdi
- call *%rbx
- movl $0, RAX(%rsp)
- RESTORE_EXTRA_REGS
- jmp int_ret_from_sys_call
+
+1:
+ movq %rsp, %rdi
+ call syscall_return_slowpath /* returns with IRQs disabled */
+ TRACE_IRQS_ON /* user mode is traced as IRQS on */
+ SWAPGS
+ jmp restore_regs_and_iret
END(ret_from_fork)
/*
diff --git a/arch/x86/entry/entry_64_compat.S b/arch/x86/entry/entry_64_compat.S
index 3c990eeee40bd..847f2f0c31e50 100644
--- a/arch/x86/entry/entry_64_compat.S
+++ b/arch/x86/entry/entry_64_compat.S
@@ -19,12 +19,21 @@
.section .entry.text, "ax"
/*
- * 32-bit SYSENTER instruction entry.
+ * 32-bit SYSENTER entry.
*
- * SYSENTER loads ss, rsp, cs, and rip from previously programmed MSRs.
- * IF and VM in rflags are cleared (IOW: interrupts are off).
+ * 32-bit system calls through the vDSO's __kernel_vsyscall enter here
+ * on 64-bit kernels running on Intel CPUs.
+ *
+ * The SYSENTER instruction, in principle, should *only* occur in the
+ * vDSO. In practice, a small number of Android devices were shipped
+ * with a copy of Bionic that inlined a SYSENTER instruction. This
+ * never happened in any of Google's Bionic versions -- it only happened
+ * in a narrow range of Intel-provided versions.
+ *
+ * SYSENTER loads SS, RSP, CS, and RIP from previously programmed MSRs.
+ * IF and VM in RFLAGS are cleared (IOW: interrupts are off).
* SYSENTER does not save anything on the stack,
- * and does not save old rip (!!!) and rflags.
+ * and does not save old RIP (!!!), RSP, or RFLAGS.
*
* Arguments:
* eax system call number
@@ -35,10 +44,6 @@
* edi arg5
* ebp user stack
* 0(%ebp) arg6
- *
- * This is purely a fast path. For anything complicated we use the int 0x80
- * path below. We set up a complete hardware stack frame to share code
- * with the int 0x80 path.
*/
ENTRY(entry_SYSENTER_compat)
/* Interrupts are off on entry. */
@@ -66,8 +71,6 @@ ENTRY(entry_SYSENTER_compat)
*/
pushfq /* pt_regs->flags (except IF = 0) */
orl $X86_EFLAGS_IF, (%rsp) /* Fix saved flags */
- ASM_CLAC /* Clear AC after saving FLAGS */
-
pushq $__USER32_CS /* pt_regs->cs */
xorq %r8,%r8
pushq %r8 /* pt_regs->ip = 0 (placeholder) */
@@ -90,19 +93,25 @@ ENTRY(entry_SYSENTER_compat)
cld
/*
- * Sysenter doesn't filter flags, so we need to clear NT
+ * SYSENTER doesn't filter flags, so we need to clear NT and AC
* ourselves. To save a few cycles, we can check whether
- * NT was set instead of doing an unconditional popfq.
+ * either was set instead of doing an unconditional popfq.
* This needs to happen before enabling interrupts so that
* we don't get preempted with NT set.
*
+ * If TF is set, we will single-step all the way to here -- do_debug
+ * will ignore all the traps. (Yes, this is slow, but so is
+ * single-stepping in general. This allows us to avoid having
+ * a more complicated code to handle the case where a user program
+ * forces us to single-step through the SYSENTER entry code.)
+ *
* NB.: .Lsysenter_fix_flags is a label with the code under it moved
* out-of-line as an optimization: NT is unlikely to be set in the
* majority of the cases and instead of polluting the I$ unnecessarily,
* we're keeping that code behind a branch which will predict as
* not-taken and therefore its instructions won't be fetched.
*/
- testl $X86_EFLAGS_NT, EFLAGS(%rsp)
+ testl $X86_EFLAGS_NT|X86_EFLAGS_AC|X86_EFLAGS_TF, EFLAGS(%rsp)
jnz .Lsysenter_fix_flags
.Lsysenter_flags_fixed:
@@ -123,20 +132,42 @@ ENTRY(entry_SYSENTER_compat)
pushq $X86_EFLAGS_FIXED
popfq
jmp .Lsysenter_flags_fixed
+GLOBAL(__end_entry_SYSENTER_compat)
ENDPROC(entry_SYSENTER_compat)
/*
- * 32-bit SYSCALL instruction entry.
+ * 32-bit SYSCALL entry.
+ *
+ * 32-bit system calls through the vDSO's __kernel_vsyscall enter here
+ * on 64-bit kernels running on AMD CPUs.
+ *
+ * The SYSCALL instruction, in principle, should *only* occur in the
+ * vDSO. In practice, it appears that this really is the case.
+ * As evidence:
+ *
+ * - The calling convention for SYSCALL has changed several times without
+ * anyone noticing.
*
- * 32-bit SYSCALL saves rip to rcx, clears rflags.RF, then saves rflags to r11,
- * then loads new ss, cs, and rip from previously programmed MSRs.
- * rflags gets masked by a value from another MSR (so CLD and CLAC
- * are not needed). SYSCALL does not save anything on the stack
- * and does not change rsp.
+ * - Prior to the in-kernel X86_BUG_SYSRET_SS_ATTRS fixup, anything
+ * user task that did SYSCALL without immediately reloading SS
+ * would randomly crash.
*
- * Note: rflags saving+masking-with-MSR happens only in Long mode
+ * - Most programmers do not directly target AMD CPUs, and the 32-bit
+ * SYSCALL instruction does not exist on Intel CPUs. Even on AMD
+ * CPUs, Linux disables the SYSCALL instruction on 32-bit kernels
+ * because the SYSCALL instruction in legacy/native 32-bit mode (as
+ * opposed to compat mode) is sufficiently poorly designed as to be
+ * essentially unusable.
+ *
+ * 32-bit SYSCALL saves RIP to RCX, clears RFLAGS.RF, then saves
+ * RFLAGS to R11, then loads new SS, CS, and RIP from previously
+ * programmed MSRs. RFLAGS gets masked by a value from another MSR
+ * (so CLD and CLAC are not needed). SYSCALL does not save anything on
+ * the stack and does not change RSP.
+ *
+ * Note: RFLAGS saving+masking-with-MSR happens only in Long mode
* (in legacy 32-bit mode, IF, RF and VM bits are cleared and that's it).
- * Don't get confused: rflags saving+masking depends on Long Mode Active bit
+ * Don't get confused: RFLAGS saving+masking depends on Long Mode Active bit
* (EFER.LMA=1), NOT on bitness of userspace where SYSCALL executes
* or target CS descriptor's L bit (SYSCALL does not read segment descriptors).
*
@@ -236,7 +267,21 @@ sysret32_from_system_call:
END(entry_SYSCALL_compat)
/*
- * Emulated IA32 system calls via int 0x80.
+ * 32-bit legacy system call entry.
+ *
+ * 32-bit x86 Linux system calls traditionally used the INT $0x80
+ * instruction. INT $0x80 lands here.
+ *
+ * This entry point can be used by 32-bit and 64-bit programs to perform
+ * 32-bit system calls. Instances of INT $0x80 can be found inline in
+ * various programs and libraries. It is also used by the vDSO's
+ * __kernel_vsyscall fallback for hardware that doesn't support a faster
+ * entry method. Restarted 32-bit system calls also fall back to INT
+ * $0x80 regardless of what instruction was originally used to do the
+ * system call.
+ *
+ * This is considered a slow path. It is not used by most libc
+ * implementations on modern hardware except during process startup.
*
* Arguments:
* eax system call number
@@ -245,17 +290,8 @@ END(entry_SYSCALL_compat)
* edx arg3
* esi arg4
* edi arg5
- * ebp arg6 (note: not saved in the stack frame, should not be touched)
- *
- * Notes:
- * Uses the same stack frame as the x86-64 version.
- * All registers except eax must be saved (but ptrace may violate that).
- * Arguments are zero extended. For system calls that want sign extension and
- * take long arguments a wrapper is needed. Most calls can just be called
- * directly.
- * Assumes it is only called from user space and entered with interrupts off.
+ * ebp arg6
*/
-
ENTRY(entry_INT80_compat)
/*
* Interrupts are off on entry.
@@ -300,7 +336,7 @@ ENTRY(entry_INT80_compat)
TRACE_IRQS_OFF
movq %rsp, %rdi
- call do_syscall_32_irqs_off
+ call do_int80_syscall_32
.Lsyscall_32_done:
/* Go back to user mode. */
diff --git a/arch/x86/entry/syscall_32.c b/arch/x86/entry/syscall_32.c
index 9a6649857106a..8f895ee13a1cb 100644
--- a/arch/x86/entry/syscall_32.c
+++ b/arch/x86/entry/syscall_32.c
@@ -6,17 +6,11 @@
#include
#include
-#ifdef CONFIG_IA32_EMULATION
-#define SYM(sym, compat) compat
-#else
-#define SYM(sym, compat) sym
-#endif
-
-#define __SYSCALL_I386(nr, sym, compat) extern asmlinkage long SYM(sym, compat)(unsigned long, unsigned long, unsigned long, unsigned long, unsigned long, unsigned long) ;
+#define __SYSCALL_I386(nr, sym, qual) extern asmlinkage long sym(unsigned long, unsigned long, unsigned long, unsigned long, unsigned long, unsigned long) ;
#include
#undef __SYSCALL_I386
-#define __SYSCALL_I386(nr, sym, compat) [nr] = SYM(sym, compat),
+#define __SYSCALL_I386(nr, sym, qual) [nr] = sym,
extern asmlinkage long sys_ni_syscall(unsigned long, unsigned long, unsigned long, unsigned long, unsigned long, unsigned long);
diff --git a/arch/x86/entry/syscall_64.c b/arch/x86/entry/syscall_64.c
index 41283d22be7a9..9dbc5abb6162f 100644
--- a/arch/x86/entry/syscall_64.c
+++ b/arch/x86/entry/syscall_64.c
@@ -6,19 +6,14 @@
#include
#include
-#define __SYSCALL_COMMON(nr, sym, compat) __SYSCALL_64(nr, sym, compat)
+#define __SYSCALL_64_QUAL_(sym) sym
+#define __SYSCALL_64_QUAL_ptregs(sym) ptregs_##sym
-#ifdef CONFIG_X86_X32_ABI
-# define __SYSCALL_X32(nr, sym, compat) __SYSCALL_64(nr, sym, compat)
-#else
-# define __SYSCALL_X32(nr, sym, compat) /* nothing */
-#endif
-
-#define __SYSCALL_64(nr, sym, compat) extern asmlinkage long sym(unsigned long, unsigned long, unsigned long, unsigned long, unsigned long, unsigned long) ;
+#define __SYSCALL_64(nr, sym, qual) extern asmlinkage long __SYSCALL_64_QUAL_##qual(sym)(unsigned long, unsigned long, unsigned long, unsigned long, unsigned long, unsigned long);
#include
#undef __SYSCALL_64
-#define __SYSCALL_64(nr, sym, compat) [nr] = sym,
+#define __SYSCALL_64(nr, sym, qual) [nr] = __SYSCALL_64_QUAL_##qual(sym),
extern long sys_ni_syscall(unsigned long, unsigned long, unsigned long, unsigned long, unsigned long, unsigned long);
diff --git a/arch/x86/entry/syscalls/syscall_64.tbl b/arch/x86/entry/syscalls/syscall_64.tbl
index dc1040a50bdc2..2e5b565adacc5 100644
--- a/arch/x86/entry/syscalls/syscall_64.tbl
+++ b/arch/x86/entry/syscalls/syscall_64.tbl
@@ -21,7 +21,7 @@
12 common brk sys_brk
13 64 rt_sigaction sys_rt_sigaction
14 common rt_sigprocmask sys_rt_sigprocmask
-15 64 rt_sigreturn stub_rt_sigreturn
+15 64 rt_sigreturn sys_rt_sigreturn/ptregs
16 64 ioctl sys_ioctl
17 common pread64 sys_pread64
18 common pwrite64 sys_pwrite64
@@ -62,10 +62,10 @@
53 common socketpair sys_socketpair
54 64 setsockopt sys_setsockopt
55 64 getsockopt sys_getsockopt
-56 common clone stub_clone
-57 common fork stub_fork
-58 common vfork stub_vfork
-59 64 execve stub_execve
+56 common clone sys_clone/ptregs
+57 common fork sys_fork/ptregs
+58 common vfork sys_vfork/ptregs
+59 64 execve sys_execve/ptregs
60 common exit sys_exit
61 common wait4 sys_wait4
62 common kill sys_kill
@@ -178,7 +178,7 @@
169 common reboot sys_reboot
170 common sethostname sys_sethostname
171 common setdomainname sys_setdomainname
-172 common iopl sys_iopl
+172 common iopl sys_iopl/ptregs
173 common ioperm sys_ioperm
174 64 create_module
175 common init_module sys_init_module
@@ -328,7 +328,7 @@
319 common memfd_create sys_memfd_create
320 common kexec_file_load sys_kexec_file_load
321 common bpf sys_bpf
-322 64 execveat stub_execveat
+322 64 execveat sys_execveat/ptregs
323 common userfaultfd sys_userfaultfd
324 common membarrier sys_membarrier
325 common mlock2 sys_mlock2
@@ -339,14 +339,14 @@
# for native 64-bit operation.
#
512 x32 rt_sigaction compat_sys_rt_sigaction
-513 x32 rt_sigreturn stub_x32_rt_sigreturn
+513 x32 rt_sigreturn sys32_x32_rt_sigreturn
514 x32 ioctl compat_sys_ioctl
515 x32 readv compat_sys_readv
516 x32 writev compat_sys_writev
517 x32 recvfrom compat_sys_recvfrom
518 x32 sendmsg compat_sys_sendmsg
519 x32 recvmsg compat_sys_recvmsg
-520 x32 execve stub_x32_execve
+520 x32 execve compat_sys_execve/ptregs
521 x32 ptrace compat_sys_ptrace
522 x32 rt_sigpending compat_sys_rt_sigpending
523 x32 rt_sigtimedwait compat_sys_rt_sigtimedwait
@@ -371,4 +371,4 @@
542 x32 getsockopt compat_sys_getsockopt
543 x32 io_setup compat_sys_io_setup
544 x32 io_submit compat_sys_io_submit
-545 x32 execveat stub_x32_execveat
+545 x32 execveat compat_sys_execveat/ptregs
diff --git a/arch/x86/entry/syscalls/syscalltbl.sh b/arch/x86/entry/syscalls/syscalltbl.sh
index 0e7f8ec071e76..cd3d3015d7df8 100644
--- a/arch/x86/entry/syscalls/syscalltbl.sh
+++ b/arch/x86/entry/syscalls/syscalltbl.sh
@@ -3,13 +3,63 @@
in="$1"
out="$2"
+syscall_macro() {
+ abi="$1"
+ nr="$2"
+ entry="$3"
+
+ # Entry can be either just a function name or "function/qualifier"
+ real_entry="${entry%%/*}"
+ qualifier="${entry:${#real_entry}}" # Strip the function name
+ qualifier="${qualifier:1}" # Strip the slash, if any
+
+ echo "__SYSCALL_${abi}($nr, $real_entry, $qualifier)"
+}
+
+emit() {
+ abi="$1"
+ nr="$2"
+ entry="$3"
+ compat="$4"
+
+ if [ "$abi" == "64" -a -n "$compat" ]; then
+ echo "a compat entry for a 64-bit syscall makes no sense" >&2
+ exit 1
+ fi
+
+ if [ -z "$compat" ]; then
+ if [ -n "$entry" ]; then
+ syscall_macro "$abi" "$nr" "$entry"
+ fi
+ else
+ echo "#ifdef CONFIG_X86_32"
+ if [ -n "$entry" ]; then
+ syscall_macro "$abi" "$nr" "$entry"
+ fi
+ echo "#else"
+ syscall_macro "$abi" "$nr" "$compat"
+ echo "#endif"
+ fi
+}
+
grep '^[0-9]' "$in" | sort -n | (
while read nr abi name entry compat; do
abi=`echo "$abi" | tr '[a-z]' '[A-Z]'`
- if [ -n "$compat" ]; then
- echo "__SYSCALL_${abi}($nr, $entry, $compat)"
- elif [ -n "$entry" ]; then
- echo "__SYSCALL_${abi}($nr, $entry, $entry)"
+ if [ "$abi" == "COMMON" -o "$abi" == "64" ]; then
+ # COMMON is the same as 64, except that we don't expect X32
+ # programs to use it. Our expectation has nothing to do with
+ # any generated code, so treat them the same.
+ emit 64 "$nr" "$entry" "$compat"
+ elif [ "$abi" == "X32" ]; then
+ # X32 is equivalent to 64 on an X32-compatible kernel.
+ echo "#ifdef CONFIG_X86_X32_ABI"
+ emit 64 "$nr" "$entry" "$compat"
+ echo "#endif"
+ elif [ "$abi" == "I386" ]; then
+ emit "$abi" "$nr" "$entry" "$compat"
+ else
+ echo "Unknown abi $abi" >&2
+ exit 1
fi
done
) > "$out"
diff --git a/arch/x86/entry/vdso/vdso2c.h b/arch/x86/entry/vdso/vdso2c.h
index 0224987556ce8..63a03bb91497e 100644
--- a/arch/x86/entry/vdso/vdso2c.h
+++ b/arch/x86/entry/vdso/vdso2c.h
@@ -140,7 +140,7 @@ static void BITSFUNC(go)(void *raw_addr, size_t raw_len,
fprintf(outfile, "#include \n");
fprintf(outfile, "\n");
fprintf(outfile,
- "static unsigned char raw_data[%lu] __page_aligned_data = {",
+ "static unsigned char raw_data[%lu] __ro_after_init __aligned(PAGE_SIZE) = {",
mapping_size);
for (j = 0; j < stripped_len; j++) {
if (j % 10 == 0)
@@ -150,16 +150,9 @@ static void BITSFUNC(go)(void *raw_addr, size_t raw_len,
}
fprintf(outfile, "\n};\n\n");
- fprintf(outfile, "static struct page *pages[%lu];\n\n",
- mapping_size / 4096);
-
fprintf(outfile, "const struct vdso_image %s = {\n", name);
fprintf(outfile, "\t.data = raw_data,\n");
fprintf(outfile, "\t.size = %lu,\n", mapping_size);
- fprintf(outfile, "\t.text_mapping = {\n");
- fprintf(outfile, "\t\t.name = \"[vdso]\",\n");
- fprintf(outfile, "\t\t.pages = pages,\n");
- fprintf(outfile, "\t},\n");
if (alt_sec) {
fprintf(outfile, "\t.alt = %lu,\n",
(unsigned long)GET_LE(&alt_sec->sh_offset));
diff --git a/arch/x86/entry/vdso/vdso32-setup.c b/arch/x86/entry/vdso/vdso32-setup.c
index 08a317a9ae4b5..7853b53959cd3 100644
--- a/arch/x86/entry/vdso/vdso32-setup.c
+++ b/arch/x86/entry/vdso/vdso32-setup.c
@@ -11,7 +11,6 @@
#include
#include
-#include
#include
#include
diff --git a/arch/x86/entry/vdso/vdso32/system_call.S b/arch/x86/entry/vdso/vdso32/system_call.S
index 3a1d9297074bc..0109ac6cb79cc 100644
--- a/arch/x86/entry/vdso/vdso32/system_call.S
+++ b/arch/x86/entry/vdso/vdso32/system_call.S
@@ -3,7 +3,7 @@
*/
#include
-#include
+#include
#include
/*
diff --git a/arch/x86/entry/vdso/vma.c b/arch/x86/entry/vdso/vma.c
index b8f69e264ac41..10f7045849226 100644
--- a/arch/x86/entry/vdso/vma.c
+++ b/arch/x86/entry/vdso/vma.c
@@ -20,6 +20,7 @@
#include
#include
#include
+#include
#if defined(CONFIG_X86_64)
unsigned int __read_mostly vdso64_enabled = 1;
@@ -27,13 +28,7 @@ unsigned int __read_mostly vdso64_enabled = 1;
void __init init_vdso_image(const struct vdso_image *image)
{
- int i;
- int npages = (image->size) / PAGE_SIZE;
-
BUG_ON(image->size % PAGE_SIZE != 0);
- for (i = 0; i < npages; i++)
- image->text_mapping.pages[i] =
- virt_to_page(image->data + i*PAGE_SIZE);
apply_alternatives((struct alt_instr *)(image->data + image->alt),
(struct alt_instr *)(image->data + image->alt +
@@ -90,18 +85,87 @@ static unsigned long vdso_addr(unsigned long start, unsigned len)
#endif
}
+static int vdso_fault(const struct vm_special_mapping *sm,
+ struct vm_area_struct *vma, struct vm_fault *vmf)
+{
+ const struct vdso_image *image = vma->vm_mm->context.vdso_image;
+
+ if (!image || (vmf->pgoff << PAGE_SHIFT) >= image->size)
+ return VM_FAULT_SIGBUS;
+
+ vmf->page = virt_to_page(image->data + (vmf->pgoff << PAGE_SHIFT));
+ get_page(vmf->page);
+ return 0;
+}
+
+static const struct vm_special_mapping text_mapping = {
+ .name = "[vdso]",
+ .fault = vdso_fault,
+};
+
+static int vvar_fault(const struct vm_special_mapping *sm,
+ struct vm_area_struct *vma, struct vm_fault *vmf)
+{
+ const struct vdso_image *image = vma->vm_mm->context.vdso_image;
+ long sym_offset;
+ int ret = -EFAULT;
+
+ if (!image)
+ return VM_FAULT_SIGBUS;
+
+ sym_offset = (long)(vmf->pgoff << PAGE_SHIFT) +
+ image->sym_vvar_start;
+
+ /*
+ * Sanity check: a symbol offset of zero means that the page
+ * does not exist for this vdso image, not that the page is at
+ * offset zero relative to the text mapping. This should be
+ * impossible here, because sym_offset should only be zero for
+ * the page past the end of the vvar mapping.
+ */
+ if (sym_offset == 0)
+ return VM_FAULT_SIGBUS;
+
+ if (sym_offset == image->sym_vvar_page) {
+ ret = vm_insert_pfn(vma, (unsigned long)vmf->virtual_address,
+ __pa_symbol(&__vvar_page) >> PAGE_SHIFT);
+ } else if (sym_offset == image->sym_hpet_page) {
+#ifdef CONFIG_HPET_TIMER
+ if (hpet_address && vclock_was_used(VCLOCK_HPET)) {
+ ret = vm_insert_pfn_prot(
+ vma,
+ (unsigned long)vmf->virtual_address,
+ hpet_address >> PAGE_SHIFT,
+ pgprot_noncached(PAGE_READONLY));
+ }
+#endif
+ } else if (sym_offset == image->sym_pvclock_page) {
+ struct pvclock_vsyscall_time_info *pvti =
+ pvclock_pvti_cpu0_va();
+ if (pvti && vclock_was_used(VCLOCK_PVCLOCK)) {
+ ret = vm_insert_pfn(
+ vma,
+ (unsigned long)vmf->virtual_address,
+ __pa(pvti) >> PAGE_SHIFT);
+ }
+ }
+
+ if (ret == 0 || ret == -EBUSY)
+ return VM_FAULT_NOPAGE;
+
+ return VM_FAULT_SIGBUS;
+}
+
static int map_vdso(const struct vdso_image *image, bool calculate_addr)
{
struct mm_struct *mm = current->mm;
struct vm_area_struct *vma;
unsigned long addr, text_start;
int ret = 0;
- static struct page *no_pages[] = {NULL};
- static struct vm_special_mapping vvar_mapping = {
+ static const struct vm_special_mapping vvar_mapping = {
.name = "[vvar]",
- .pages = no_pages,
+ .fault = vvar_fault,
};
- struct pvclock_vsyscall_time_info *pvti;
if (calculate_addr) {
addr = vdso_addr(current->mm->start_stack,
@@ -121,6 +185,7 @@ static int map_vdso(const struct vdso_image *image, bool calculate_addr)
text_start = addr - image->sym_vvar_start;
current->mm->context.vdso = (void __user *)text_start;
+ current->mm->context.vdso_image = image;
/*
* MAYWRITE to allow gdb to COW and set breakpoints
@@ -130,7 +195,7 @@ static int map_vdso(const struct vdso_image *image, bool calculate_addr)
image->size,
VM_READ|VM_EXEC|
VM_MAYREAD|VM_MAYWRITE|VM_MAYEXEC,
- &image->text_mapping);
+ &text_mapping);
if (IS_ERR(vma)) {
ret = PTR_ERR(vma);
@@ -140,7 +205,8 @@ static int map_vdso(const struct vdso_image *image, bool calculate_addr)
vma = _install_special_mapping(mm,
addr,
-image->sym_vvar_start,
- VM_READ|VM_MAYREAD,
+ VM_READ|VM_MAYREAD|VM_IO|VM_DONTDUMP|
+ VM_PFNMAP,
&vvar_mapping);
if (IS_ERR(vma)) {
@@ -148,41 +214,6 @@ static int map_vdso(const struct vdso_image *image, bool calculate_addr)
goto up_fail;
}
- if (image->sym_vvar_page)
- ret = remap_pfn_range(vma,
- text_start + image->sym_vvar_page,
- __pa_symbol(&__vvar_page) >> PAGE_SHIFT,
- PAGE_SIZE,
- PAGE_READONLY);
-
- if (ret)
- goto up_fail;
-
-#ifdef CONFIG_HPET_TIMER
- if (hpet_address && image->sym_hpet_page) {
- ret = io_remap_pfn_range(vma,
- text_start + image->sym_hpet_page,
- hpet_address >> PAGE_SHIFT,
- PAGE_SIZE,
- pgprot_noncached(PAGE_READONLY));
-
- if (ret)
- goto up_fail;
- }
-#endif
-
- pvti = pvclock_pvti_cpu0_va();
- if (pvti && image->sym_pvclock_page) {
- ret = remap_pfn_range(vma,
- text_start + image->sym_pvclock_page,
- __pa(pvti) >> PAGE_SHIFT,
- PAGE_SIZE,
- PAGE_READONLY);
-
- if (ret)
- goto up_fail;
- }
-
up_fail:
if (ret)
current->mm->context.vdso = NULL;
@@ -254,7 +285,7 @@ static void vgetcpu_cpu_init(void *arg)
#ifdef CONFIG_NUMA
node = cpu_to_node(cpu);
#endif
- if (cpu_has(&cpu_data(cpu), X86_FEATURE_RDTSCP))
+ if (static_cpu_has(X86_FEATURE_RDTSCP))
write_rdtscp_aux((node << 12) | cpu);
/*
diff --git a/arch/x86/entry/vsyscall/vsyscall_gtod.c b/arch/x86/entry/vsyscall/vsyscall_gtod.c
index 51e3304169951..0fb3a104ac626 100644
--- a/arch/x86/entry/vsyscall/vsyscall_gtod.c
+++ b/arch/x86/entry/vsyscall/vsyscall_gtod.c
@@ -16,6 +16,8 @@
#include
#include
+int vclocks_used __read_mostly;
+
DEFINE_VVAR(struct vsyscall_gtod_data, vsyscall_gtod_data);
void update_vsyscall_tz(void)
@@ -26,12 +28,17 @@ void update_vsyscall_tz(void)
void update_vsyscall(struct timekeeper *tk)
{
+ int vclock_mode = tk->tkr_mono.clock->archdata.vclock_mode;
struct vsyscall_gtod_data *vdata = &vsyscall_gtod_data;
+ /* Mark the new vclock used. */
+ BUILD_BUG_ON(VCLOCK_MAX >= 32);
+ WRITE_ONCE(vclocks_used, READ_ONCE(vclocks_used) | (1 << vclock_mode));
+
gtod_write_begin(vdata);
/* copy vsyscall data */
- vdata->vclock_mode = tk->tkr_mono.clock->archdata.vclock_mode;
+ vdata->vclock_mode = vclock_mode;
vdata->cycle_last = tk->tkr_mono.cycle_last;
vdata->mask = tk->tkr_mono.mask;
vdata->mult = tk->tkr_mono.mult;
diff --git a/arch/x86/events/Makefile b/arch/x86/events/Makefile
new file mode 100644
index 0000000000000..fdfea1511cc0e
--- /dev/null
+++ b/arch/x86/events/Makefile
@@ -0,0 +1,13 @@
+obj-y += core.o
+
+obj-$(CONFIG_CPU_SUP_AMD) += amd/core.o amd/uncore.o
+obj-$(CONFIG_X86_LOCAL_APIC) += amd/ibs.o msr.o
+ifdef CONFIG_AMD_IOMMU
+obj-$(CONFIG_CPU_SUP_AMD) += amd/iommu.o
+endif
+obj-$(CONFIG_CPU_SUP_INTEL) += intel/core.o intel/bts.o intel/cqm.o
+obj-$(CONFIG_CPU_SUP_INTEL) += intel/cstate.o intel/ds.o intel/knc.o
+obj-$(CONFIG_CPU_SUP_INTEL) += intel/lbr.o intel/p4.o intel/p6.o intel/pt.o
+obj-$(CONFIG_CPU_SUP_INTEL) += intel/rapl.o msr.o
+obj-$(CONFIG_PERF_EVENTS_INTEL_UNCORE) += intel/uncore.o intel/uncore_nhmex.o
+obj-$(CONFIG_PERF_EVENTS_INTEL_UNCORE) += intel/uncore_snb.o intel/uncore_snbep.o
diff --git a/arch/x86/kernel/cpu/perf_event_amd.c b/arch/x86/events/amd/core.c
similarity index 99%
rename from arch/x86/kernel/cpu/perf_event_amd.c
rename to arch/x86/events/amd/core.c
index 58610539b0486..049ada8d4e9c9 100644
--- a/arch/x86/kernel/cpu/perf_event_amd.c
+++ b/arch/x86/events/amd/core.c
@@ -5,7 +5,7 @@
#include
#include
-#include "perf_event.h"
+#include "../perf_event.h"
static __initconst const u64 amd_hw_cache_event_ids
[PERF_COUNT_HW_CACHE_MAX]
diff --git a/arch/x86/kernel/cpu/perf_event_amd_ibs.c b/arch/x86/events/amd/ibs.c
similarity index 98%
rename from arch/x86/kernel/cpu/perf_event_amd_ibs.c
rename to arch/x86/events/amd/ibs.c
index 989d3c215d2bc..51087c29b2c23 100644
--- a/arch/x86/kernel/cpu/perf_event_amd_ibs.c
+++ b/arch/x86/events/amd/ibs.c
@@ -14,7 +14,7 @@
#include
-#include "perf_event.h"
+#include "../perf_event.h"
static u32 ibs_caps;
@@ -670,7 +670,7 @@ static __init int perf_event_ibs_init(void)
perf_ibs_pmu_init(&perf_ibs_op, "ibs_op");
register_nmi_handler(NMI_LOCAL, perf_ibs_nmi_handler, 0, "perf_ibs");
- printk(KERN_INFO "perf: AMD IBS detected (0x%08x)\n", ibs_caps);
+ pr_info("perf: AMD IBS detected (0x%08x)\n", ibs_caps);
return 0;
}
@@ -774,14 +774,14 @@ static int setup_ibs_ctl(int ibs_eilvt_off)
pci_read_config_dword(cpu_cfg, IBSCTL, &value);
if (value != (ibs_eilvt_off | IBSCTL_LVT_OFFSET_VALID)) {
pci_dev_put(cpu_cfg);
- printk(KERN_DEBUG "Failed to setup IBS LVT offset, "
- "IBSCTL = 0x%08x\n", value);
+ pr_debug("Failed to setup IBS LVT offset, IBSCTL = 0x%08x\n",
+ value);
return -EINVAL;
}
} while (1);
if (!nodes) {
- printk(KERN_DEBUG "No CPU node configured for IBS\n");
+ pr_debug("No CPU node configured for IBS\n");
return -ENODEV;
}
@@ -810,7 +810,7 @@ static void force_ibs_eilvt_setup(void)
preempt_enable();
if (offset == APIC_EILVT_NR_MAX) {
- printk(KERN_DEBUG "No EILVT entry available\n");
+ pr_debug("No EILVT entry available\n");
return;
}
diff --git a/arch/x86/kernel/cpu/perf_event_amd_iommu.c b/arch/x86/events/amd/iommu.c
similarity index 99%
rename from arch/x86/kernel/cpu/perf_event_amd_iommu.c
rename to arch/x86/events/amd/iommu.c
index 97242a9242bdf..635e5eba0caf8 100644
--- a/arch/x86/kernel/cpu/perf_event_amd_iommu.c
+++ b/arch/x86/events/amd/iommu.c
@@ -16,8 +16,8 @@
#include
#include
-#include "perf_event.h"
-#include "perf_event_amd_iommu.h"
+#include "../perf_event.h"
+#include "iommu.h"
#define COUNTER_SHIFT 16
diff --git a/arch/x86/kernel/cpu/perf_event_amd_iommu.h b/arch/x86/events/amd/iommu.h
similarity index 100%
rename from arch/x86/kernel/cpu/perf_event_amd_iommu.h
rename to arch/x86/events/amd/iommu.h
diff --git a/arch/x86/kernel/cpu/perf_event_amd_uncore.c b/arch/x86/events/amd/uncore.c
similarity index 99%
rename from arch/x86/kernel/cpu/perf_event_amd_uncore.c
rename to arch/x86/events/amd/uncore.c
index 8836fc9fa84b0..3db9569e658c8 100644
--- a/arch/x86/kernel/cpu/perf_event_amd_uncore.c
+++ b/arch/x86/events/amd/uncore.c
@@ -538,7 +538,7 @@ static int __init amd_uncore_init(void)
if (ret)
goto fail_nb;
- printk(KERN_INFO "perf: AMD NB counters detected\n");
+ pr_info("perf: AMD NB counters detected\n");
ret = 0;
}
@@ -552,7 +552,7 @@ static int __init amd_uncore_init(void)
if (ret)
goto fail_l2;
- printk(KERN_INFO "perf: AMD L2I counters detected\n");
+ pr_info("perf: AMD L2I counters detected\n");
ret = 0;
}
diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/events/core.c
similarity index 98%
rename from arch/x86/kernel/cpu/perf_event.c
rename to arch/x86/events/core.c
index 1b443db2db500..5e830d0c95c99 100644
--- a/arch/x86/kernel/cpu/perf_event.c
+++ b/arch/x86/events/core.c
@@ -254,15 +254,16 @@ static bool check_hw_exists(void)
* We still allow the PMU driver to operate:
*/
if (bios_fail) {
- printk(KERN_CONT "Broken BIOS detected, complain to your hardware vendor.\n");
- printk(KERN_ERR FW_BUG "the BIOS has corrupted hw-PMU resources (MSR %x is %Lx)\n", reg_fail, val_fail);
+ pr_cont("Broken BIOS detected, complain to your hardware vendor.\n");
+ pr_err(FW_BUG "the BIOS has corrupted hw-PMU resources (MSR %x is %Lx)\n",
+ reg_fail, val_fail);
}
return true;
msr_fail:
- printk(KERN_CONT "Broken PMU hardware detected, using software events only.\n");
- printk("%sFailed to access perfctr msr (MSR %x is %Lx)\n",
+ pr_cont("Broken PMU hardware detected, using software events only.\n");
+ pr_info("%sFailed to access perfctr msr (MSR %x is %Lx)\n",
boot_cpu_has(X86_FEATURE_HYPERVISOR) ? KERN_INFO : KERN_ERR,
reg, val_new);
@@ -596,6 +597,19 @@ void x86_pmu_disable_all(void)
}
}
+/*
+ * There may be PMI landing after enabled=0. The PMI hitting could be before or
+ * after disable_all.
+ *
+ * If PMI hits before disable_all, the PMU will be disabled in the NMI handler.
+ * It will not be re-enabled in the NMI handler again, because enabled=0. After
+ * handling the NMI, disable_all will be called, which will not change the
+ * state either. If PMI hits after disable_all, the PMU is already disabled
+ * before entering NMI handler. The NMI handler will not change the state
+ * either.
+ *
+ * So either situation is harmless.
+ */
static void x86_pmu_disable(struct pmu *pmu)
{
struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
diff --git a/arch/x86/kernel/cpu/perf_event_intel_bts.c b/arch/x86/events/intel/bts.c
similarity index 99%
rename from arch/x86/kernel/cpu/perf_event_intel_bts.c
rename to arch/x86/events/intel/bts.c
index 2cad71d1b14cf..b99dc9258c0f9 100644
--- a/arch/x86/kernel/cpu/perf_event_intel_bts.c
+++ b/arch/x86/events/intel/bts.c
@@ -26,7 +26,7 @@
#include
#include
-#include "perf_event.h"
+#include "../perf_event.h"
struct bts_ctx {
struct perf_output_handle handle;
diff --git a/arch/x86/kernel/cpu/perf_event_intel.c b/arch/x86/events/intel/core.c
similarity index 98%
rename from arch/x86/kernel/cpu/perf_event_intel.c
rename to arch/x86/events/intel/core.c
index fed2ab1f1065d..68fa55b4d42e6 100644
--- a/arch/x86/kernel/cpu/perf_event_intel.c
+++ b/arch/x86/events/intel/core.c
@@ -18,7 +18,7 @@
#include
#include
-#include "perf_event.h"
+#include "../perf_event.h"
/*
* Intel PerfMon, used on Core and later.
@@ -1502,7 +1502,15 @@ static __initconst const u64 knl_hw_cache_extra_regs
};
/*
- * Use from PMIs where the LBRs are already disabled.
+ * Used from PMIs where the LBRs are already disabled.
+ *
+ * This function could be called consecutively. It is required to remain in
+ * disabled state if called consecutively.
+ *
+ * During consecutive calls, the same disable value will be written to related
+ * registers, so the PMU state remains unchanged. hw.state in
+ * intel_bts_disable_local will remain PERF_HES_STOPPED too in consecutive
+ * calls.
*/
static void __intel_pmu_disable_all(void)
{
@@ -1884,6 +1892,16 @@ static int intel_pmu_handle_irq(struct pt_regs *regs)
if (__test_and_clear_bit(62, (unsigned long *)&status)) {
handled++;
x86_pmu.drain_pebs(regs);
+ /*
+ * There are cases where, even though, the PEBS ovfl bit is set
+ * in GLOBAL_OVF_STATUS, the PEBS events may also have their
+ * overflow bits set for their counters. We must clear them
+ * here because they have been processed as exact samples in
+ * the drain_pebs() routine. They must not be processed again
+ * in the for_each_bit_set() loop for regular samples below.
+ */
+ status &= ~cpuc->pebs_enabled;
+ status &= x86_pmu.intel_ctrl | GLOBAL_STATUS_TRACE_TOPAPMI;
}
/*
@@ -1929,7 +1947,10 @@ static int intel_pmu_handle_irq(struct pt_regs *regs)
goto again;
done:
- __intel_pmu_enable_all(0, true);
+ /* Only restore PMU state when it's active. See x86_pmu_disable(). */
+ if (cpuc->enabled)
+ __intel_pmu_enable_all(0, true);
+
/*
* Only unmask the NMI after the overflow counters
* have been reset. This avoids spurious NMIs on
@@ -3396,6 +3417,7 @@ __init int intel_pmu_init(void)
intel_perfmon_event_map[PERF_COUNT_HW_STALLED_CYCLES_BACKEND] =
X86_CONFIG(.event=0xb1, .umask=0x3f, .inv=1, .cmask=1);
+ intel_pmu_pebs_data_source_nhm();
x86_add_quirk(intel_nehalem_quirk);
pr_cont("Nehalem events, ");
@@ -3459,6 +3481,7 @@ __init int intel_pmu_init(void)
intel_perfmon_event_map[PERF_COUNT_HW_STALLED_CYCLES_BACKEND] =
X86_CONFIG(.event=0xb1, .umask=0x3f, .inv=1, .cmask=1);
+ intel_pmu_pebs_data_source_nhm();
pr_cont("Westmere events, ");
break;
@@ -3581,7 +3604,7 @@ __init int intel_pmu_init(void)
intel_pmu_lbr_init_hsw();
x86_pmu.event_constraints = intel_bdw_event_constraints;
- x86_pmu.pebs_constraints = intel_hsw_pebs_event_constraints;
+ x86_pmu.pebs_constraints = intel_bdw_pebs_event_constraints;
x86_pmu.extra_regs = intel_snbep_extra_regs;
x86_pmu.pebs_aliases = intel_pebs_aliases_ivb;
x86_pmu.pebs_prec_dist = true;
diff --git a/arch/x86/kernel/cpu/perf_event_intel_cqm.c b/arch/x86/events/intel/cqm.c
similarity index 98%
rename from arch/x86/kernel/cpu/perf_event_intel_cqm.c
rename to arch/x86/events/intel/cqm.c
index a316ca96f1b63..93cb412a55792 100644
--- a/arch/x86/kernel/cpu/perf_event_intel_cqm.c
+++ b/arch/x86/events/intel/cqm.c
@@ -7,7 +7,7 @@
#include
#include
#include
-#include "perf_event.h"
+#include "../perf_event.h"
#define MSR_IA32_PQR_ASSOC 0x0c8f
#define MSR_IA32_QM_CTR 0x0c8e
@@ -1244,15 +1244,12 @@ static struct pmu intel_cqm_pmu = {
static inline void cqm_pick_event_reader(int cpu)
{
- int phys_id = topology_physical_package_id(cpu);
- int i;
+ int reader;
- for_each_cpu(i, &cqm_cpumask) {
- if (phys_id == topology_physical_package_id(i))
- return; /* already got reader for this socket */
- }
-
- cpumask_set_cpu(cpu, &cqm_cpumask);
+ /* First online cpu in package becomes the reader */
+ reader = cpumask_any_and(&cqm_cpumask, topology_core_cpumask(cpu));
+ if (reader >= nr_cpu_ids)
+ cpumask_set_cpu(cpu, &cqm_cpumask);
}
static void intel_cqm_cpu_starting(unsigned int cpu)
@@ -1270,24 +1267,17 @@ static void intel_cqm_cpu_starting(unsigned int cpu)
static void intel_cqm_cpu_exit(unsigned int cpu)
{
- int phys_id = topology_physical_package_id(cpu);
- int i;
+ int target;
- /*
- * Is @cpu a designated cqm reader?
- */
+ /* Is @cpu the current cqm reader for this package ? */
if (!cpumask_test_and_clear_cpu(cpu, &cqm_cpumask))
return;
- for_each_online_cpu(i) {
- if (i == cpu)
- continue;
+ /* Find another online reader in this package */
+ target = cpumask_any_but(topology_core_cpumask(cpu), cpu);
- if (phys_id == topology_physical_package_id(i)) {
- cpumask_set_cpu(i, &cqm_cpumask);
- break;
- }
- }
+ if (target < nr_cpu_ids)
+ cpumask_set_cpu(target, &cqm_cpumask);
}
static int intel_cqm_cpu_notifier(struct notifier_block *nb,
diff --git a/arch/x86/kernel/cpu/perf_event_intel_cstate.c b/arch/x86/events/intel/cstate.c
similarity index 99%
rename from arch/x86/kernel/cpu/perf_event_intel_cstate.c
rename to arch/x86/events/intel/cstate.c
index 75a38b5a2e26a..7946c4231169f 100644
--- a/arch/x86/kernel/cpu/perf_event_intel_cstate.c
+++ b/arch/x86/events/intel/cstate.c
@@ -89,7 +89,7 @@
#include
#include
#include
-#include "perf_event.h"
+#include "../perf_event.h"
#define DEFINE_CSTATE_FORMAT_ATTR(_var, _name, _format) \
static ssize_t __cstate_##_var##_show(struct kobject *kobj, \
diff --git a/arch/x86/kernel/cpu/perf_event_intel_ds.c b/arch/x86/events/intel/ds.c
similarity index 93%
rename from arch/x86/kernel/cpu/perf_event_intel_ds.c
rename to arch/x86/events/intel/ds.c
index 10602f0a438fd..ce7211a07c0b8 100644
--- a/arch/x86/kernel/cpu/perf_event_intel_ds.c
+++ b/arch/x86/events/intel/ds.c
@@ -5,7 +5,7 @@
#include
#include
-#include "perf_event.h"
+#include "../perf_event.h"
/* The size of a BTS record in bytes: */
#define BTS_RECORD_SIZE 24
@@ -51,7 +51,8 @@ union intel_x86_pebs_dse {
#define OP_LH (P(OP, LOAD) | P(LVL, HIT))
#define SNOOP_NONE_MISS (P(SNOOP, NONE) | P(SNOOP, MISS))
-static const u64 pebs_data_source[] = {
+/* Version for Sandy Bridge and later */
+static u64 pebs_data_source[] = {
P(OP, LOAD) | P(LVL, MISS) | P(LVL, L3) | P(SNOOP, NA),/* 0x00:ukn L3 */
OP_LH | P(LVL, L1) | P(SNOOP, NONE), /* 0x01: L1 local */
OP_LH | P(LVL, LFB) | P(SNOOP, NONE), /* 0x02: LFB hit */
@@ -70,6 +71,14 @@ static const u64 pebs_data_source[] = {
OP_LH | P(LVL, UNC) | P(SNOOP, NONE), /* 0x0f: uncached */
};
+/* Patch up minor differences in the bits */
+void __init intel_pmu_pebs_data_source_nhm(void)
+{
+ pebs_data_source[0x05] = OP_LH | P(LVL, L3) | P(SNOOP, HIT);
+ pebs_data_source[0x06] = OP_LH | P(LVL, L3) | P(SNOOP, HITM);
+ pebs_data_source[0x07] = OP_LH | P(LVL, L3) | P(SNOOP, HITM);
+}
+
static u64 precise_store_data(u64 status)
{
union intel_x86_pebs_dse dse;
@@ -269,7 +278,7 @@ static int alloc_pebs_buffer(int cpu)
if (!x86_pmu.pebs)
return 0;
- buffer = kzalloc_node(PEBS_BUFFER_SIZE, GFP_KERNEL, node);
+ buffer = kzalloc_node(x86_pmu.pebs_buffer_size, GFP_KERNEL, node);
if (unlikely(!buffer))
return -ENOMEM;
@@ -286,7 +295,7 @@ static int alloc_pebs_buffer(int cpu)
per_cpu(insn_buffer, cpu) = ibuffer;
}
- max = PEBS_BUFFER_SIZE / x86_pmu.pebs_record_size;
+ max = x86_pmu.pebs_buffer_size / x86_pmu.pebs_record_size;
ds->pebs_buffer_base = (u64)(unsigned long)buffer;
ds->pebs_index = ds->pebs_buffer_base;
@@ -722,6 +731,30 @@ struct event_constraint intel_hsw_pebs_event_constraints[] = {
EVENT_CONSTRAINT_END
};
+struct event_constraint intel_bdw_pebs_event_constraints[] = {
+ INTEL_FLAGS_UEVENT_CONSTRAINT(0x01c0, 0x2), /* INST_RETIRED.PRECDIST */
+ INTEL_PLD_CONSTRAINT(0x01cd, 0xf), /* MEM_TRANS_RETIRED.* */
+ /* UOPS_RETIRED.ALL, inv=1, cmask=16 (cycles:p). */
+ INTEL_FLAGS_EVENT_CONSTRAINT(0x108001c2, 0xf),
+ /* INST_RETIRED.PREC_DIST, inv=1, cmask=16 (cycles:ppp). */
+ INTEL_FLAGS_EVENT_CONSTRAINT(0x108001c0, 0x2),
+ INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_NA(0x01c2, 0xf), /* UOPS_RETIRED.ALL */
+ INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_LD(0x11d0, 0xf), /* MEM_UOPS_RETIRED.STLB_MISS_LOADS */
+ INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_LD(0x21d0, 0xf), /* MEM_UOPS_RETIRED.LOCK_LOADS */
+ INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_LD(0x41d0, 0xf), /* MEM_UOPS_RETIRED.SPLIT_LOADS */
+ INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_LD(0x81d0, 0xf), /* MEM_UOPS_RETIRED.ALL_LOADS */
+ INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_ST(0x12d0, 0xf), /* MEM_UOPS_RETIRED.STLB_MISS_STORES */
+ INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_ST(0x42d0, 0xf), /* MEM_UOPS_RETIRED.SPLIT_STORES */
+ INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_ST(0x82d0, 0xf), /* MEM_UOPS_RETIRED.ALL_STORES */
+ INTEL_FLAGS_EVENT_CONSTRAINT_DATALA_LD(0xd1, 0xf), /* MEM_LOAD_UOPS_RETIRED.* */
+ INTEL_FLAGS_EVENT_CONSTRAINT_DATALA_LD(0xd2, 0xf), /* MEM_LOAD_UOPS_L3_HIT_RETIRED.* */
+ INTEL_FLAGS_EVENT_CONSTRAINT_DATALA_LD(0xd3, 0xf), /* MEM_LOAD_UOPS_L3_MISS_RETIRED.* */
+ /* Allow all events as PEBS with no flags */
+ INTEL_ALL_EVENT_CONSTRAINT(0, 0xf),
+ EVENT_CONSTRAINT_END
+};
+
+
struct event_constraint intel_skl_pebs_event_constraints[] = {
INTEL_FLAGS_UEVENT_CONSTRAINT(0x1c0, 0x2), /* INST_RETIRED.PREC_DIST */
/* INST_RETIRED.PREC_DIST, inv=1, cmask=16 (cycles:ppp). */
@@ -1319,19 +1352,28 @@ void __init intel_ds_init(void)
x86_pmu.bts = boot_cpu_has(X86_FEATURE_BTS);
x86_pmu.pebs = boot_cpu_has(X86_FEATURE_PEBS);
+ x86_pmu.pebs_buffer_size = PEBS_BUFFER_SIZE;
if (x86_pmu.pebs) {
char pebs_type = x86_pmu.intel_cap.pebs_trap ? '+' : '-';
int format = x86_pmu.intel_cap.pebs_format;
switch (format) {
case 0:
- printk(KERN_CONT "PEBS fmt0%c, ", pebs_type);
+ pr_cont("PEBS fmt0%c, ", pebs_type);
x86_pmu.pebs_record_size = sizeof(struct pebs_record_core);
+ /*
+ * Using >PAGE_SIZE buffers makes the WRMSR to
+ * PERF_GLOBAL_CTRL in intel_pmu_enable_all()
+ * mysteriously hang on Core2.
+ *
+ * As a workaround, we don't do this.
+ */
+ x86_pmu.pebs_buffer_size = PAGE_SIZE;
x86_pmu.drain_pebs = intel_pmu_drain_pebs_core;
break;
case 1:
- printk(KERN_CONT "PEBS fmt1%c, ", pebs_type);
+ pr_cont("PEBS fmt1%c, ", pebs_type);
x86_pmu.pebs_record_size = sizeof(struct pebs_record_nhm);
x86_pmu.drain_pebs = intel_pmu_drain_pebs_nhm;
break;
@@ -1351,7 +1393,7 @@ void __init intel_ds_init(void)
break;
default:
- printk(KERN_CONT "no PEBS fmt%d%c, ", format, pebs_type);
+ pr_cont("no PEBS fmt%d%c, ", format, pebs_type);
x86_pmu.pebs = 0;
}
}
diff --git a/arch/x86/kernel/cpu/perf_event_knc.c b/arch/x86/events/intel/knc.c
similarity index 98%
rename from arch/x86/kernel/cpu/perf_event_knc.c
rename to arch/x86/events/intel/knc.c
index 5b0c232d1ee62..548d5f774b07e 100644
--- a/arch/x86/kernel/cpu/perf_event_knc.c
+++ b/arch/x86/events/intel/knc.c
@@ -5,7 +5,7 @@
#include
-#include "perf_event.h"
+#include "../perf_event.h"
static const u64 knc_perfmon_event_map[] =
{
@@ -263,7 +263,9 @@ static int knc_pmu_handle_irq(struct pt_regs *regs)
goto again;
done:
- knc_pmu_enable_all(0);
+ /* Only restore PMU state when it's active. See x86_pmu_disable(). */
+ if (cpuc->enabled)
+ knc_pmu_enable_all(0);
return handled;
}
diff --git a/arch/x86/kernel/cpu/perf_event_intel_lbr.c b/arch/x86/events/intel/lbr.c
similarity index 99%
rename from arch/x86/kernel/cpu/perf_event_intel_lbr.c
rename to arch/x86/events/intel/lbr.c
index 653f88d259878..69dd11887dd1e 100644
--- a/arch/x86/kernel/cpu/perf_event_intel_lbr.c
+++ b/arch/x86/events/intel/lbr.c
@@ -5,7 +5,7 @@
#include
#include
-#include "perf_event.h"
+#include "../perf_event.h"
enum {
LBR_FORMAT_32 = 0x00,
diff --git a/arch/x86/kernel/cpu/perf_event_p4.c b/arch/x86/events/intel/p4.c
similarity index 99%
rename from arch/x86/kernel/cpu/perf_event_p4.c
rename to arch/x86/events/intel/p4.c
index f2e56783af3d8..0a5ede187d9c1 100644
--- a/arch/x86/kernel/cpu/perf_event_p4.c
+++ b/arch/x86/events/intel/p4.c
@@ -13,7 +13,7 @@
#include
#include
-#include "perf_event.h"
+#include "../perf_event.h"
#define P4_CNTR_LIMIT 3
/*
diff --git a/arch/x86/kernel/cpu/perf_event_p6.c b/arch/x86/events/intel/p6.c
similarity index 99%
rename from arch/x86/kernel/cpu/perf_event_p6.c
rename to arch/x86/events/intel/p6.c
index 7c1a0c07b607f..1f5c47ab4c655 100644
--- a/arch/x86/kernel/cpu/perf_event_p6.c
+++ b/arch/x86/events/intel/p6.c
@@ -1,7 +1,7 @@
#include
#include
-#include "perf_event.h"
+#include "../perf_event.h"
/*
* Not sure about some of these
diff --git a/arch/x86/kernel/cpu/perf_event_intel_pt.c b/arch/x86/events/intel/pt.c
similarity index 99%
rename from arch/x86/kernel/cpu/perf_event_intel_pt.c
rename to arch/x86/events/intel/pt.c
index c0bbd1033b7cb..6af7cf71d6b2e 100644
--- a/arch/x86/kernel/cpu/perf_event_intel_pt.c
+++ b/arch/x86/events/intel/pt.c
@@ -29,8 +29,8 @@
#include
#include
-#include "perf_event.h"
-#include "intel_pt.h"
+#include "../perf_event.h"
+#include "pt.h"
static DEFINE_PER_CPU(struct pt, pt_ctx);
diff --git a/arch/x86/kernel/cpu/intel_pt.h b/arch/x86/events/intel/pt.h
similarity index 100%
rename from arch/x86/kernel/cpu/intel_pt.h
rename to arch/x86/events/intel/pt.h
diff --git a/arch/x86/kernel/cpu/perf_event_intel_rapl.c b/arch/x86/events/intel/rapl.c
similarity index 74%
rename from arch/x86/kernel/cpu/perf_event_intel_rapl.c
rename to arch/x86/events/intel/rapl.c
index 24a351ad628d2..b834a3f55a015 100644
--- a/arch/x86/kernel/cpu/perf_event_intel_rapl.c
+++ b/arch/x86/events/intel/rapl.c
@@ -44,11 +44,14 @@
* the duration of the measurement. Tools may use a function such as
* ldexp(raw_count, -32);
*/
+
+#define pr_fmt(fmt) "RAPL PMU: " fmt
+
#include
#include
#include
#include
-#include "perf_event.h"
+#include "../perf_event.h"
/*
* RAPL energy status counters
@@ -107,7 +110,7 @@ static ssize_t __rapl_##_var##_show(struct kobject *kobj, \
static struct kobj_attribute format_attr_##_var = \
__ATTR(_name, 0444, __rapl_##_var##_show, NULL)
-#define RAPL_CNTR_WIDTH 32 /* 32-bit rapl counters */
+#define RAPL_CNTR_WIDTH 32
#define RAPL_EVENT_ATTR_STR(_name, v, str) \
static struct perf_pmu_events_attr event_attr_##v = { \
@@ -117,23 +120,33 @@ static struct perf_pmu_events_attr event_attr_##v = { \
};
struct rapl_pmu {
- spinlock_t lock;
- int n_active; /* number of active events */
- struct list_head active_list;
- struct pmu *pmu; /* pointer to rapl_pmu_class */
- ktime_t timer_interval; /* in ktime_t unit */
- struct hrtimer hrtimer;
+ raw_spinlock_t lock;
+ int n_active;
+ int cpu;
+ struct list_head active_list;
+ struct pmu *pmu;
+ ktime_t timer_interval;
+ struct hrtimer hrtimer;
};
-static int rapl_hw_unit[NR_RAPL_DOMAINS] __read_mostly; /* 1/2^hw_unit Joule */
-static struct pmu rapl_pmu_class;
+struct rapl_pmus {
+ struct pmu pmu;
+ unsigned int maxpkg;
+ struct rapl_pmu *pmus[];
+};
+
+ /* 1/2^hw_unit Joule */
+static int rapl_hw_unit[NR_RAPL_DOMAINS] __read_mostly;
+static struct rapl_pmus *rapl_pmus;
static cpumask_t rapl_cpu_mask;
-static int rapl_cntr_mask;
+static unsigned int rapl_cntr_mask;
+static u64 rapl_timer_ms;
-static DEFINE_PER_CPU(struct rapl_pmu *, rapl_pmu);
-static DEFINE_PER_CPU(struct rapl_pmu *, rapl_pmu_to_free);
+static inline struct rapl_pmu *cpu_to_rapl_pmu(unsigned int cpu)
+{
+ return rapl_pmus->pmus[topology_logical_package_id(cpu)];
+}
-static struct x86_pmu_quirk *rapl_quirks;
static inline u64 rapl_read_counter(struct perf_event *event)
{
u64 raw;
@@ -141,19 +154,10 @@ static inline u64 rapl_read_counter(struct perf_event *event)
return raw;
}
-#define rapl_add_quirk(func_) \
-do { \
- static struct x86_pmu_quirk __quirk __initdata = { \
- .func = func_, \
- }; \
- __quirk.next = rapl_quirks; \
- rapl_quirks = &__quirk; \
-} while (0)
-
static inline u64 rapl_scale(u64 v, int cfg)
{
if (cfg > NR_RAPL_DOMAINS) {
- pr_warn("invalid domain %d, failed to scale data\n", cfg);
+ pr_warn("Invalid domain %d, failed to scale data\n", cfg);
return v;
}
/*
@@ -206,27 +210,21 @@ static void rapl_start_hrtimer(struct rapl_pmu *pmu)
HRTIMER_MODE_REL_PINNED);
}
-static void rapl_stop_hrtimer(struct rapl_pmu *pmu)
-{
- hrtimer_cancel(&pmu->hrtimer);
-}
-
static enum hrtimer_restart rapl_hrtimer_handle(struct hrtimer *hrtimer)
{
- struct rapl_pmu *pmu = __this_cpu_read(rapl_pmu);
+ struct rapl_pmu *pmu = container_of(hrtimer, struct rapl_pmu, hrtimer);
struct perf_event *event;
unsigned long flags;
if (!pmu->n_active)
return HRTIMER_NORESTART;
- spin_lock_irqsave(&pmu->lock, flags);
+ raw_spin_lock_irqsave(&pmu->lock, flags);
- list_for_each_entry(event, &pmu->active_list, active_entry) {
+ list_for_each_entry(event, &pmu->active_list, active_entry)
rapl_event_update(event);
- }
- spin_unlock_irqrestore(&pmu->lock, flags);
+ raw_spin_unlock_irqrestore(&pmu->lock, flags);
hrtimer_forward_now(hrtimer, pmu->timer_interval);
@@ -260,28 +258,28 @@ static void __rapl_pmu_event_start(struct rapl_pmu *pmu,
static void rapl_pmu_event_start(struct perf_event *event, int mode)
{
- struct rapl_pmu *pmu = __this_cpu_read(rapl_pmu);
+ struct rapl_pmu *pmu = event->pmu_private;
unsigned long flags;
- spin_lock_irqsave(&pmu->lock, flags);
+ raw_spin_lock_irqsave(&pmu->lock, flags);
__rapl_pmu_event_start(pmu, event);
- spin_unlock_irqrestore(&pmu->lock, flags);
+ raw_spin_unlock_irqrestore(&pmu->lock, flags);
}
static void rapl_pmu_event_stop(struct perf_event *event, int mode)
{
- struct rapl_pmu *pmu = __this_cpu_read(rapl_pmu);
+ struct rapl_pmu *pmu = event->pmu_private;
struct hw_perf_event *hwc = &event->hw;
unsigned long flags;
- spin_lock_irqsave(&pmu->lock, flags);
+ raw_spin_lock_irqsave(&pmu->lock, flags);
/* mark event as deactivated and stopped */
if (!(hwc->state & PERF_HES_STOPPED)) {
WARN_ON_ONCE(pmu->n_active <= 0);
pmu->n_active--;
if (pmu->n_active == 0)
- rapl_stop_hrtimer(pmu);
+ hrtimer_cancel(&pmu->hrtimer);
list_del(&event->active_entry);
@@ -299,23 +297,23 @@ static void rapl_pmu_event_stop(struct perf_event *event, int mode)
hwc->state |= PERF_HES_UPTODATE;
}
- spin_unlock_irqrestore(&pmu->lock, flags);
+ raw_spin_unlock_irqrestore(&pmu->lock, flags);
}
static int rapl_pmu_event_add(struct perf_event *event, int mode)
{
- struct rapl_pmu *pmu = __this_cpu_read(rapl_pmu);
+ struct rapl_pmu *pmu = event->pmu_private;
struct hw_perf_event *hwc = &event->hw;
unsigned long flags;
- spin_lock_irqsave(&pmu->lock, flags);
+ raw_spin_lock_irqsave(&pmu->lock, flags);
hwc->state = PERF_HES_UPTODATE | PERF_HES_STOPPED;
if (mode & PERF_EF_START)
__rapl_pmu_event_start(pmu, event);
- spin_unlock_irqrestore(&pmu->lock, flags);
+ raw_spin_unlock_irqrestore(&pmu->lock, flags);
return 0;
}
@@ -329,15 +327,19 @@ static int rapl_pmu_event_init(struct perf_event *event)
{
u64 cfg = event->attr.config & RAPL_EVENT_MASK;
int bit, msr, ret = 0;
+ struct rapl_pmu *pmu;
/* only look at RAPL events */
- if (event->attr.type != rapl_pmu_class.type)
+ if (event->attr.type != rapl_pmus->pmu.type)
return -ENOENT;
/* check only supported bits are set */
if (event->attr.config & ~RAPL_EVENT_MASK)
return -EINVAL;
+ if (event->cpu < 0)
+ return -EINVAL;
+
/*
* check event is known (determines counter)
*/
@@ -376,6 +378,9 @@ static int rapl_pmu_event_init(struct perf_event *event)
return -EINVAL;
/* must be done before validate_group */
+ pmu = cpu_to_rapl_pmu(event->cpu);
+ event->cpu = pmu->cpu;
+ event->pmu_private = pmu;
event->hw.event_base = msr;
event->hw.config = cfg;
event->hw.idx = bit;
@@ -506,139 +511,62 @@ const struct attribute_group *rapl_attr_groups[] = {
NULL,
};
-static struct pmu rapl_pmu_class = {
- .attr_groups = rapl_attr_groups,
- .task_ctx_nr = perf_invalid_context, /* system-wide only */
- .event_init = rapl_pmu_event_init,
- .add = rapl_pmu_event_add, /* must have */
- .del = rapl_pmu_event_del, /* must have */
- .start = rapl_pmu_event_start,
- .stop = rapl_pmu_event_stop,
- .read = rapl_pmu_event_read,
-};
-
static void rapl_cpu_exit(int cpu)
{
- struct rapl_pmu *pmu = per_cpu(rapl_pmu, cpu);
- int i, phys_id = topology_physical_package_id(cpu);
- int target = -1;
+ struct rapl_pmu *pmu = cpu_to_rapl_pmu(cpu);
+ int target;
- /* find a new cpu on same package */
- for_each_online_cpu(i) {
- if (i == cpu)
- continue;
- if (phys_id == topology_physical_package_id(i)) {
- target = i;
- break;
- }
- }
- /*
- * clear cpu from cpumask
- * if was set in cpumask and still some cpu on package,
- * then move to new cpu
- */
- if (cpumask_test_and_clear_cpu(cpu, &rapl_cpu_mask) && target >= 0)
- cpumask_set_cpu(target, &rapl_cpu_mask);
+ /* Check if exiting cpu is used for collecting rapl events */
+ if (!cpumask_test_and_clear_cpu(cpu, &rapl_cpu_mask))
+ return;
- WARN_ON(cpumask_empty(&rapl_cpu_mask));
- /*
- * migrate events and context to new cpu
- */
- if (target >= 0)
- perf_pmu_migrate_context(pmu->pmu, cpu, target);
+ pmu->cpu = -1;
+ /* Find a new cpu to collect rapl events */
+ target = cpumask_any_but(topology_core_cpumask(cpu), cpu);
- /* cancel overflow polling timer for CPU */
- rapl_stop_hrtimer(pmu);
+ /* Migrate rapl events to the new target */
+ if (target < nr_cpu_ids) {
+ cpumask_set_cpu(target, &rapl_cpu_mask);
+ pmu->cpu = target;
+ perf_pmu_migrate_context(pmu->pmu, cpu, target);
+ }
}
static void rapl_cpu_init(int cpu)
{
- int i, phys_id = topology_physical_package_id(cpu);
+ struct rapl_pmu *pmu = cpu_to_rapl_pmu(cpu);
+ int target;
- /* check if phys_is is already covered */
- for_each_cpu(i, &rapl_cpu_mask) {
- if (phys_id == topology_physical_package_id(i))
- return;
- }
- /* was not found, so add it */
- cpumask_set_cpu(cpu, &rapl_cpu_mask);
-}
-
-static __init void rapl_hsw_server_quirk(void)
-{
/*
- * DRAM domain on HSW server has fixed energy unit which can be
- * different than the unit from power unit MSR.
- * "Intel Xeon Processor E5-1600 and E5-2600 v3 Product Families, V2
- * of 2. Datasheet, September 2014, Reference Number: 330784-001 "
+ * Check if there is an online cpu in the package which collects rapl
+ * events already.
*/
- rapl_hw_unit[RAPL_IDX_RAM_NRG_STAT] = 16;
+ target = cpumask_any_and(&rapl_cpu_mask, topology_core_cpumask(cpu));
+ if (target < nr_cpu_ids)
+ return;
+
+ cpumask_set_cpu(cpu, &rapl_cpu_mask);
+ pmu->cpu = cpu;
}
static int rapl_cpu_prepare(int cpu)
{
- struct rapl_pmu *pmu = per_cpu(rapl_pmu, cpu);
- int phys_id = topology_physical_package_id(cpu);
- u64 ms;
+ struct rapl_pmu *pmu = cpu_to_rapl_pmu(cpu);
if (pmu)
return 0;
- if (phys_id < 0)
- return -1;
-
pmu = kzalloc_node(sizeof(*pmu), GFP_KERNEL, cpu_to_node(cpu));
if (!pmu)
- return -1;
- spin_lock_init(&pmu->lock);
+ return -ENOMEM;
+ raw_spin_lock_init(&pmu->lock);
INIT_LIST_HEAD(&pmu->active_list);
-
- pmu->pmu = &rapl_pmu_class;
-
- /*
- * use reference of 200W for scaling the timeout
- * to avoid missing counter overflows.
- * 200W = 200 Joules/sec
- * divide interval by 2 to avoid lockstep (2 * 100)
- * if hw unit is 32, then we use 2 ms 1/200/2
- */
- if (rapl_hw_unit[0] < 32)
- ms = (1000 / (2 * 100)) * (1ULL << (32 - rapl_hw_unit[0] - 1));
- else
- ms = 2;
-
- pmu->timer_interval = ms_to_ktime(ms);
-
+ pmu->pmu = &rapl_pmus->pmu;
+ pmu->timer_interval = ms_to_ktime(rapl_timer_ms);
+ pmu->cpu = -1;
rapl_hrtimer_init(pmu);
-
- /* set RAPL pmu for this cpu for now */
- per_cpu(rapl_pmu, cpu) = pmu;
- per_cpu(rapl_pmu_to_free, cpu) = NULL;
-
- return 0;
-}
-
-static void rapl_cpu_kfree(int cpu)
-{
- struct rapl_pmu *pmu = per_cpu(rapl_pmu_to_free, cpu);
-
- kfree(pmu);
-
- per_cpu(rapl_pmu_to_free, cpu) = NULL;
-}
-
-static int rapl_cpu_dying(int cpu)
-{
- struct rapl_pmu *pmu = per_cpu(rapl_pmu, cpu);
-
- if (!pmu)
- return 0;
-
- per_cpu(rapl_pmu, cpu) = NULL;
-
- per_cpu(rapl_pmu_to_free, cpu) = pmu;
-
+ rapl_pmus->pmus[topology_logical_package_id(cpu)] = pmu;
return 0;
}
@@ -651,28 +579,20 @@ static int rapl_cpu_notifier(struct notifier_block *self,
case CPU_UP_PREPARE:
rapl_cpu_prepare(cpu);
break;
- case CPU_STARTING:
- rapl_cpu_init(cpu);
- break;
- case CPU_UP_CANCELED:
- case CPU_DYING:
- rapl_cpu_dying(cpu);
- break;
+
+ case CPU_DOWN_FAILED:
case CPU_ONLINE:
- case CPU_DEAD:
- rapl_cpu_kfree(cpu);
+ rapl_cpu_init(cpu);
break;
+
case CPU_DOWN_PREPARE:
rapl_cpu_exit(cpu);
break;
- default:
- break;
}
-
return NOTIFY_OK;
}
-static int rapl_check_hw_unit(void)
+static int rapl_check_hw_unit(bool apply_quirk)
{
u64 msr_rapl_power_unit_bits;
int i;
@@ -683,28 +603,107 @@ static int rapl_check_hw_unit(void)
for (i = 0; i < NR_RAPL_DOMAINS; i++)
rapl_hw_unit[i] = (msr_rapl_power_unit_bits >> 8) & 0x1FULL;
+ /*
+ * DRAM domain on HSW server and KNL has fixed energy unit which can be
+ * different than the unit from power unit MSR. See
+ * "Intel Xeon Processor E5-1600 and E5-2600 v3 Product Families, V2
+ * of 2. Datasheet, September 2014, Reference Number: 330784-001 "
+ */
+ if (apply_quirk)
+ rapl_hw_unit[RAPL_IDX_RAM_NRG_STAT] = 16;
+
+ /*
+ * Calculate the timer rate:
+ * Use reference of 200W for scaling the timeout to avoid counter
+ * overflows. 200W = 200 Joules/sec
+ * Divide interval by 2 to avoid lockstep (2 * 100)
+ * if hw unit is 32, then we use 2 ms 1/200/2
+ */
+ rapl_timer_ms = 2;
+ if (rapl_hw_unit[0] < 32) {
+ rapl_timer_ms = (1000 / (2 * 100));
+ rapl_timer_ms *= (1ULL << (32 - rapl_hw_unit[0] - 1));
+ }
+ return 0;
+}
+
+static void __init rapl_advertise(void)
+{
+ int i;
+
+ pr_info("API unit is 2^-32 Joules, %d fixed counters, %llu ms ovfl timer\n",
+ hweight32(rapl_cntr_mask), rapl_timer_ms);
+
+ for (i = 0; i < NR_RAPL_DOMAINS; i++) {
+ if (rapl_cntr_mask & (1 << i)) {
+ pr_info("hw unit of domain %s 2^-%d Joules\n",
+ rapl_domain_names[i], rapl_hw_unit[i]);
+ }
+ }
+}
+
+static int __init rapl_prepare_cpus(void)
+{
+ unsigned int cpu, pkg;
+ int ret;
+
+ for_each_online_cpu(cpu) {
+ pkg = topology_logical_package_id(cpu);
+ if (rapl_pmus->pmus[pkg])
+ continue;
+
+ ret = rapl_cpu_prepare(cpu);
+ if (ret)
+ return ret;
+ rapl_cpu_init(cpu);
+ }
+ return 0;
+}
+
+static void __init cleanup_rapl_pmus(void)
+{
+ int i;
+
+ for (i = 0; i < rapl_pmus->maxpkg; i++)
+ kfree(rapl_pmus->pmus + i);
+ kfree(rapl_pmus);
+}
+
+static int __init init_rapl_pmus(void)
+{
+ int maxpkg = topology_max_packages();
+ size_t size;
+
+ size = sizeof(*rapl_pmus) + maxpkg * sizeof(struct rapl_pmu *);
+ rapl_pmus = kzalloc(size, GFP_KERNEL);
+ if (!rapl_pmus)
+ return -ENOMEM;
+
+ rapl_pmus->maxpkg = maxpkg;
+ rapl_pmus->pmu.attr_groups = rapl_attr_groups;
+ rapl_pmus->pmu.task_ctx_nr = perf_invalid_context;
+ rapl_pmus->pmu.event_init = rapl_pmu_event_init;
+ rapl_pmus->pmu.add = rapl_pmu_event_add;
+ rapl_pmus->pmu.del = rapl_pmu_event_del;
+ rapl_pmus->pmu.start = rapl_pmu_event_start;
+ rapl_pmus->pmu.stop = rapl_pmu_event_stop;
+ rapl_pmus->pmu.read = rapl_pmu_event_read;
return 0;
}
-static const struct x86_cpu_id rapl_cpu_match[] = {
+static const struct x86_cpu_id rapl_cpu_match[] __initconst = {
[0] = { .vendor = X86_VENDOR_INTEL, .family = 6 },
[1] = {},
};
static int __init rapl_pmu_init(void)
{
- struct rapl_pmu *pmu;
- int cpu, ret;
- struct x86_pmu_quirk *quirk;
- int i;
+ bool apply_quirk = false;
+ int ret;
- /*
- * check for Intel processor family 6
- */
if (!x86_match_cpu(rapl_cpu_match))
- return 0;
+ return -ENODEV;
- /* check supported CPU */
switch (boot_cpu_data.x86_model) {
case 42: /* Sandy Bridge */
case 58: /* Ivy Bridge */
@@ -712,7 +711,7 @@ static int __init rapl_pmu_init(void)
rapl_pmu_events_group.attrs = rapl_events_cln_attr;
break;
case 63: /* Haswell-Server */
- rapl_add_quirk(rapl_hsw_server_quirk);
+ apply_quirk = true;
rapl_cntr_mask = RAPL_IDX_SRV;
rapl_pmu_events_group.attrs = rapl_events_srv_attr;
break;
@@ -728,56 +727,41 @@ static int __init rapl_pmu_init(void)
rapl_pmu_events_group.attrs = rapl_events_srv_attr;
break;
case 87: /* Knights Landing */
- rapl_add_quirk(rapl_hsw_server_quirk);
+ apply_quirk = true;
rapl_cntr_mask = RAPL_IDX_KNL;
rapl_pmu_events_group.attrs = rapl_events_knl_attr;
-
+ break;
default:
- /* unsupported */
- return 0;
+ return -ENODEV;
}
- ret = rapl_check_hw_unit();
+
+ ret = rapl_check_hw_unit(apply_quirk);
if (ret)
return ret;
- /* run cpu model quirks */
- for (quirk = rapl_quirks; quirk; quirk = quirk->next)
- quirk->func();
- cpu_notifier_register_begin();
+ ret = init_rapl_pmus();
+ if (ret)
+ return ret;
- for_each_online_cpu(cpu) {
- ret = rapl_cpu_prepare(cpu);
- if (ret)
- goto out;
- rapl_cpu_init(cpu);
- }
+ cpu_notifier_register_begin();
- __perf_cpu_notifier(rapl_cpu_notifier);
+ ret = rapl_prepare_cpus();
+ if (ret)
+ goto out;
- ret = perf_pmu_register(&rapl_pmu_class, "power", -1);
- if (WARN_ON(ret)) {
- pr_info("RAPL PMU detected, registration failed (%d), RAPL PMU disabled\n", ret);
- cpu_notifier_register_done();
- return -1;
- }
+ ret = perf_pmu_register(&rapl_pmus->pmu, "power", -1);
+ if (ret)
+ goto out;
- pmu = __this_cpu_read(rapl_pmu);
+ __perf_cpu_notifier(rapl_cpu_notifier);
+ cpu_notifier_register_done();
+ rapl_advertise();
+ return 0;
- pr_info("RAPL PMU detected,"
- " API unit is 2^-32 Joules,"
- " %d fixed counters"
- " %llu ms ovfl timer\n",
- hweight32(rapl_cntr_mask),
- ktime_to_ms(pmu->timer_interval));
- for (i = 0; i < NR_RAPL_DOMAINS; i++) {
- if (rapl_cntr_mask & (1 << i)) {
- pr_info("hw unit of domain %s 2^-%d Joules\n",
- rapl_domain_names[i], rapl_hw_unit[i]);
- }
- }
out:
+ pr_warn("Initialization failed (%d), disabled\n", ret);
+ cleanup_rapl_pmus();
cpu_notifier_register_done();
-
- return 0;
+ return ret;
}
device_initcall(rapl_pmu_init);
diff --git a/arch/x86/kernel/cpu/perf_event_intel_uncore.c b/arch/x86/events/intel/uncore.c
similarity index 75%
rename from arch/x86/kernel/cpu/perf_event_intel_uncore.c
rename to arch/x86/events/intel/uncore.c
index 3bf41d4137750..7012d18bb2930 100644
--- a/arch/x86/kernel/cpu/perf_event_intel_uncore.c
+++ b/arch/x86/events/intel/uncore.c
@@ -1,4 +1,4 @@
-#include "perf_event_intel_uncore.h"
+#include "uncore.h"
static struct intel_uncore_type *empty_uncore[] = { NULL, };
struct intel_uncore_type **uncore_msr_uncores = empty_uncore;
@@ -9,9 +9,9 @@ struct pci_driver *uncore_pci_driver;
/* pci bus to socket mapping */
DEFINE_RAW_SPINLOCK(pci2phy_map_lock);
struct list_head pci2phy_map_head = LIST_HEAD_INIT(pci2phy_map_head);
-struct pci_dev *uncore_extra_pci_dev[UNCORE_SOCKET_MAX][UNCORE_EXTRA_PCI_DEV_MAX];
+struct pci_extra_dev *uncore_extra_pci_dev;
+static int max_packages;
-static DEFINE_RAW_SPINLOCK(uncore_box_lock);
/* mask of cpus that collect uncore events */
static cpumask_t uncore_cpu_mask;
@@ -21,7 +21,7 @@ static struct event_constraint uncore_constraint_fixed =
struct event_constraint uncore_constraint_empty =
EVENT_CONSTRAINT(0, 0, 0);
-int uncore_pcibus_to_physid(struct pci_bus *bus)
+static int uncore_pcibus_to_physid(struct pci_bus *bus)
{
struct pci2phy_map *map;
int phys_id = -1;
@@ -38,6 +38,16 @@ int uncore_pcibus_to_physid(struct pci_bus *bus)
return phys_id;
}
+static void uncore_free_pcibus_map(void)
+{
+ struct pci2phy_map *map, *tmp;
+
+ list_for_each_entry_safe(map, tmp, &pci2phy_map_head, list) {
+ list_del(&map->list);
+ kfree(map);
+ }
+}
+
struct pci2phy_map *__find_pci2phy_map(int segment)
{
struct pci2phy_map *map, *alloc = NULL;
@@ -82,43 +92,9 @@ ssize_t uncore_event_show(struct kobject *kobj,
return sprintf(buf, "%s", event->config);
}
-struct intel_uncore_pmu *uncore_event_to_pmu(struct perf_event *event)
-{
- return container_of(event->pmu, struct intel_uncore_pmu, pmu);
-}
-
struct intel_uncore_box *uncore_pmu_to_box(struct intel_uncore_pmu *pmu, int cpu)
{
- struct intel_uncore_box *box;
-
- box = *per_cpu_ptr(pmu->box, cpu);
- if (box)
- return box;
-
- raw_spin_lock(&uncore_box_lock);
- /* Recheck in lock to handle races. */
- if (*per_cpu_ptr(pmu->box, cpu))
- goto out;
- list_for_each_entry(box, &pmu->box_list, list) {
- if (box->phys_id == topology_physical_package_id(cpu)) {
- atomic_inc(&box->refcnt);
- *per_cpu_ptr(pmu->box, cpu) = box;
- break;
- }
- }
-out:
- raw_spin_unlock(&uncore_box_lock);
-
- return *per_cpu_ptr(pmu->box, cpu);
-}
-
-struct intel_uncore_box *uncore_event_to_box(struct perf_event *event)
-{
- /*
- * perf core schedules event on the basis of cpu, uncore events are
- * collected by one of the cpus inside a physical package.
- */
- return uncore_pmu_to_box(uncore_event_to_pmu(event), smp_processor_id());
+ return pmu->boxes[topology_logical_package_id(cpu)];
}
u64 uncore_msr_read_counter(struct intel_uncore_box *box, struct perf_event *event)
@@ -207,7 +183,8 @@ u64 uncore_shared_reg_config(struct intel_uncore_box *box, int idx)
return config;
}
-static void uncore_assign_hw_event(struct intel_uncore_box *box, struct perf_event *event, int idx)
+static void uncore_assign_hw_event(struct intel_uncore_box *box,
+ struct perf_event *event, int idx)
{
struct hw_perf_event *hwc = &event->hw;
@@ -302,24 +279,25 @@ static void uncore_pmu_init_hrtimer(struct intel_uncore_box *box)
box->hrtimer.function = uncore_pmu_hrtimer;
}
-static struct intel_uncore_box *uncore_alloc_box(struct intel_uncore_type *type, int node)
+static struct intel_uncore_box *uncore_alloc_box(struct intel_uncore_type *type,
+ int node)
{
+ int i, size, numshared = type->num_shared_regs ;
struct intel_uncore_box *box;
- int i, size;
- size = sizeof(*box) + type->num_shared_regs * sizeof(struct intel_uncore_extra_reg);
+ size = sizeof(*box) + numshared * sizeof(struct intel_uncore_extra_reg);
box = kzalloc_node(size, GFP_KERNEL, node);
if (!box)
return NULL;
- for (i = 0; i < type->num_shared_regs; i++)
+ for (i = 0; i < numshared; i++)
raw_spin_lock_init(&box->shared_regs[i].lock);
uncore_pmu_init_hrtimer(box);
- atomic_set(&box->refcnt, 1);
box->cpu = -1;
- box->phys_id = -1;
+ box->pci_phys_id = -1;
+ box->pkgid = -1;
/* set default hrtimer timeout */
box->hrtimer_duration = UNCORE_PMU_HRTIMER_INTERVAL;
@@ -341,7 +319,8 @@ static bool is_uncore_event(struct perf_event *event)
}
static int
-uncore_collect_events(struct intel_uncore_box *box, struct perf_event *leader, bool dogrp)
+uncore_collect_events(struct intel_uncore_box *box, struct perf_event *leader,
+ bool dogrp)
{
struct perf_event *event;
int n, max_count;
@@ -402,7 +381,8 @@ uncore_get_event_constraint(struct intel_uncore_box *box, struct perf_event *eve
return &type->unconstrainted;
}
-static void uncore_put_event_constraint(struct intel_uncore_box *box, struct perf_event *event)
+static void uncore_put_event_constraint(struct intel_uncore_box *box,
+ struct perf_event *event)
{
if (box->pmu->type->ops->put_constraint)
box->pmu->type->ops->put_constraint(box, event);
@@ -582,7 +562,7 @@ static void uncore_pmu_event_del(struct perf_event *event, int flags)
if (event == box->event_list[i]) {
uncore_put_event_constraint(box, event);
- while (++i < box->n_events)
+ for (++i; i < box->n_events; i++)
box->event_list[i - 1] = box->event_list[i];
--box->n_events;
@@ -676,6 +656,7 @@ static int uncore_pmu_event_init(struct perf_event *event)
if (!box || box->cpu < 0)
return -EINVAL;
event->cpu = box->cpu;
+ event->pmu_private = box;
event->hw.idx = -1;
event->hw.last_tag = ~0ULL;
@@ -760,64 +741,110 @@ static int uncore_pmu_register(struct intel_uncore_pmu *pmu)
}
ret = perf_pmu_register(&pmu->pmu, pmu->name, -1);
+ if (!ret)
+ pmu->registered = true;
return ret;
}
+static void uncore_pmu_unregister(struct intel_uncore_pmu *pmu)
+{
+ if (!pmu->registered)
+ return;
+ perf_pmu_unregister(&pmu->pmu);
+ pmu->registered = false;
+}
+
+static void __init __uncore_exit_boxes(struct intel_uncore_type *type, int cpu)
+{
+ struct intel_uncore_pmu *pmu = type->pmus;
+ struct intel_uncore_box *box;
+ int i, pkg;
+
+ if (pmu) {
+ pkg = topology_physical_package_id(cpu);
+ for (i = 0; i < type->num_boxes; i++, pmu++) {
+ box = pmu->boxes[pkg];
+ if (box)
+ uncore_box_exit(box);
+ }
+ }
+}
+
+static void __init uncore_exit_boxes(void *dummy)
+{
+ struct intel_uncore_type **types;
+
+ for (types = uncore_msr_uncores; *types; types++)
+ __uncore_exit_boxes(*types++, smp_processor_id());
+}
+
+static void uncore_free_boxes(struct intel_uncore_pmu *pmu)
+{
+ int pkg;
+
+ for (pkg = 0; pkg < max_packages; pkg++)
+ kfree(pmu->boxes[pkg]);
+ kfree(pmu->boxes);
+}
+
static void __init uncore_type_exit(struct intel_uncore_type *type)
{
+ struct intel_uncore_pmu *pmu = type->pmus;
int i;
- for (i = 0; i < type->num_boxes; i++)
- free_percpu(type->pmus[i].box);
- kfree(type->pmus);
- type->pmus = NULL;
+ if (pmu) {
+ for (i = 0; i < type->num_boxes; i++, pmu++) {
+ uncore_pmu_unregister(pmu);
+ uncore_free_boxes(pmu);
+ }
+ kfree(type->pmus);
+ type->pmus = NULL;
+ }
kfree(type->events_group);
type->events_group = NULL;
}
static void __init uncore_types_exit(struct intel_uncore_type **types)
{
- int i;
- for (i = 0; types[i]; i++)
- uncore_type_exit(types[i]);
+ for (; *types; types++)
+ uncore_type_exit(*types);
}
-static int __init uncore_type_init(struct intel_uncore_type *type)
+static int __init uncore_type_init(struct intel_uncore_type *type, bool setid)
{
struct intel_uncore_pmu *pmus;
struct attribute_group *attr_group;
struct attribute **attrs;
+ size_t size;
int i, j;
pmus = kzalloc(sizeof(*pmus) * type->num_boxes, GFP_KERNEL);
if (!pmus)
return -ENOMEM;
- type->pmus = pmus;
+ size = max_packages * sizeof(struct intel_uncore_box *);
+ for (i = 0; i < type->num_boxes; i++) {
+ pmus[i].func_id = setid ? i : -1;
+ pmus[i].pmu_idx = i;
+ pmus[i].type = type;
+ pmus[i].boxes = kzalloc(size, GFP_KERNEL);
+ if (!pmus[i].boxes)
+ return -ENOMEM;
+ }
+
+ type->pmus = pmus;
type->unconstrainted = (struct event_constraint)
__EVENT_CONSTRAINT(0, (1ULL << type->num_counters) - 1,
0, type->num_counters, 0, 0);
- for (i = 0; i < type->num_boxes; i++) {
- pmus[i].func_id = -1;
- pmus[i].pmu_idx = i;
- pmus[i].type = type;
- INIT_LIST_HEAD(&pmus[i].box_list);
- pmus[i].box = alloc_percpu(struct intel_uncore_box *);
- if (!pmus[i].box)
- goto fail;
- }
-
if (type->event_descs) {
- i = 0;
- while (type->event_descs[i].attr.attr.name)
- i++;
+ for (i = 0; type->event_descs[i].attr.attr.name; i++);
attr_group = kzalloc(sizeof(struct attribute *) * (i + 1) +
sizeof(*attr_group), GFP_KERNEL);
if (!attr_group)
- goto fail;
+ return -ENOMEM;
attrs = (struct attribute **)(attr_group + 1);
attr_group->name = "events";
@@ -831,25 +858,19 @@ static int __init uncore_type_init(struct intel_uncore_type *type)
type->pmu_group = &uncore_pmu_attr_group;
return 0;
-fail:
- uncore_type_exit(type);
- return -ENOMEM;
}
-static int __init uncore_types_init(struct intel_uncore_type **types)
+static int __init
+uncore_types_init(struct intel_uncore_type **types, bool setid)
{
- int i, ret;
+ int ret;
- for (i = 0; types[i]; i++) {
- ret = uncore_type_init(types[i]);
+ for (; *types; types++) {
+ ret = uncore_type_init(*types, setid);
if (ret)
- goto fail;
+ return ret;
}
return 0;
-fail:
- while (--i >= 0)
- uncore_type_exit(types[i]);
- return ret;
}
/*
@@ -857,28 +878,28 @@ static int __init uncore_types_init(struct intel_uncore_type **types)
*/
static int uncore_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id)
{
+ struct intel_uncore_type *type;
struct intel_uncore_pmu *pmu;
struct intel_uncore_box *box;
- struct intel_uncore_type *type;
- int phys_id;
- bool first_box = false;
+ int phys_id, pkg, ret;
phys_id = uncore_pcibus_to_physid(pdev->bus);
if (phys_id < 0)
return -ENODEV;
+ pkg = topology_phys_to_logical_pkg(phys_id);
+ if (WARN_ON_ONCE(pkg < 0))
+ return -EINVAL;
+
if (UNCORE_PCI_DEV_TYPE(id->driver_data) == UNCORE_EXTRA_PCI_DEV) {
int idx = UNCORE_PCI_DEV_IDX(id->driver_data);
- uncore_extra_pci_dev[phys_id][idx] = pdev;
+
+ uncore_extra_pci_dev[pkg].dev[idx] = pdev;
pci_set_drvdata(pdev, NULL);
return 0;
}
type = uncore_pci_uncores[UNCORE_PCI_DEV_TYPE(id->driver_data)];
- box = uncore_alloc_box(type, NUMA_NO_NODE);
- if (!box)
- return -ENOMEM;
-
/*
* for performance monitoring unit with multiple boxes,
* each box has a different function id.
@@ -890,44 +911,60 @@ static int uncore_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id
* some device types. Hence PCI device idx would be 0 for all devices.
* So increment pmu pointer to point to an unused array element.
*/
- if (boot_cpu_data.x86_model == 87)
+ if (boot_cpu_data.x86_model == 87) {
while (pmu->func_id >= 0)
pmu++;
+ }
+
+ if (WARN_ON_ONCE(pmu->boxes[pkg] != NULL))
+ return -EINVAL;
+
+ box = uncore_alloc_box(type, NUMA_NO_NODE);
+ if (!box)
+ return -ENOMEM;
+
if (pmu->func_id < 0)
pmu->func_id = pdev->devfn;
else
WARN_ON_ONCE(pmu->func_id != pdev->devfn);
- box->phys_id = phys_id;
+ atomic_inc(&box->refcnt);
+ box->pci_phys_id = phys_id;
+ box->pkgid = pkg;
box->pci_dev = pdev;
box->pmu = pmu;
uncore_box_init(box);
pci_set_drvdata(pdev, box);
- raw_spin_lock(&uncore_box_lock);
- if (list_empty(&pmu->box_list))
- first_box = true;
- list_add_tail(&box->list, &pmu->box_list);
- raw_spin_unlock(&uncore_box_lock);
+ pmu->boxes[pkg] = box;
+ if (atomic_inc_return(&pmu->activeboxes) > 1)
+ return 0;
- if (first_box)
- uncore_pmu_register(pmu);
- return 0;
+ /* First active box registers the pmu */
+ ret = uncore_pmu_register(pmu);
+ if (ret) {
+ pci_set_drvdata(pdev, NULL);
+ pmu->boxes[pkg] = NULL;
+ uncore_box_exit(box);
+ kfree(box);
+ }
+ return ret;
}
static void uncore_pci_remove(struct pci_dev *pdev)
{
struct intel_uncore_box *box = pci_get_drvdata(pdev);
struct intel_uncore_pmu *pmu;
- int i, cpu, phys_id;
- bool last_box = false;
+ int i, phys_id, pkg;
phys_id = uncore_pcibus_to_physid(pdev->bus);
+ pkg = topology_phys_to_logical_pkg(phys_id);
+
box = pci_get_drvdata(pdev);
if (!box) {
for (i = 0; i < UNCORE_EXTRA_PCI_DEV_MAX; i++) {
- if (uncore_extra_pci_dev[phys_id][i] == pdev) {
- uncore_extra_pci_dev[phys_id][i] = NULL;
+ if (uncore_extra_pci_dev[pkg].dev[i] == pdev) {
+ uncore_extra_pci_dev[pkg].dev[i] = NULL;
break;
}
}
@@ -936,33 +973,20 @@ static void uncore_pci_remove(struct pci_dev *pdev)
}
pmu = box->pmu;
- if (WARN_ON_ONCE(phys_id != box->phys_id))
+ if (WARN_ON_ONCE(phys_id != box->pci_phys_id))
return;
pci_set_drvdata(pdev, NULL);
-
- raw_spin_lock(&uncore_box_lock);
- list_del(&box->list);
- if (list_empty(&pmu->box_list))
- last_box = true;
- raw_spin_unlock(&uncore_box_lock);
-
- for_each_possible_cpu(cpu) {
- if (*per_cpu_ptr(pmu->box, cpu) == box) {
- *per_cpu_ptr(pmu->box, cpu) = NULL;
- atomic_dec(&box->refcnt);
- }
- }
-
- WARN_ON_ONCE(atomic_read(&box->refcnt) != 1);
+ pmu->boxes[pkg] = NULL;
+ if (atomic_dec_return(&pmu->activeboxes) == 0)
+ uncore_pmu_unregister(pmu);
+ uncore_box_exit(box);
kfree(box);
-
- if (last_box)
- perf_pmu_unregister(&pmu->pmu);
}
static int __init uncore_pci_init(void)
{
+ size_t size;
int ret;
switch (boot_cpu_data.x86_model) {
@@ -999,25 +1023,40 @@ static int __init uncore_pci_init(void)
ret = skl_uncore_pci_init();
break;
default:
- return 0;
+ return -ENODEV;
}
if (ret)
return ret;
- ret = uncore_types_init(uncore_pci_uncores);
+ size = max_packages * sizeof(struct pci_extra_dev);
+ uncore_extra_pci_dev = kzalloc(size, GFP_KERNEL);
+ if (!uncore_extra_pci_dev) {
+ ret = -ENOMEM;
+ goto err;
+ }
+
+ ret = uncore_types_init(uncore_pci_uncores, false);
if (ret)
- return ret;
+ goto errtype;
uncore_pci_driver->probe = uncore_pci_probe;
uncore_pci_driver->remove = uncore_pci_remove;
ret = pci_register_driver(uncore_pci_driver);
- if (ret == 0)
- pcidrv_registered = true;
- else
- uncore_types_exit(uncore_pci_uncores);
+ if (ret)
+ goto errtype;
+
+ pcidrv_registered = true;
+ return 0;
+errtype:
+ uncore_types_exit(uncore_pci_uncores);
+ kfree(uncore_extra_pci_dev);
+ uncore_extra_pci_dev = NULL;
+ uncore_free_pcibus_map();
+err:
+ uncore_pci_uncores = empty_uncore;
return ret;
}
@@ -1027,173 +1066,139 @@ static void __init uncore_pci_exit(void)
pcidrv_registered = false;
pci_unregister_driver(uncore_pci_driver);
uncore_types_exit(uncore_pci_uncores);
- }
-}
-
-/* CPU hot plug/unplug are serialized by cpu_add_remove_lock mutex */
-static LIST_HEAD(boxes_to_free);
-
-static void uncore_kfree_boxes(void)
-{
- struct intel_uncore_box *box;
-
- while (!list_empty(&boxes_to_free)) {
- box = list_entry(boxes_to_free.next,
- struct intel_uncore_box, list);
- list_del(&box->list);
- kfree(box);
+ kfree(uncore_extra_pci_dev);
+ uncore_free_pcibus_map();
}
}
static void uncore_cpu_dying(int cpu)
{
- struct intel_uncore_type *type;
+ struct intel_uncore_type *type, **types = uncore_msr_uncores;
struct intel_uncore_pmu *pmu;
struct intel_uncore_box *box;
- int i, j;
-
- for (i = 0; uncore_msr_uncores[i]; i++) {
- type = uncore_msr_uncores[i];
- for (j = 0; j < type->num_boxes; j++) {
- pmu = &type->pmus[j];
- box = *per_cpu_ptr(pmu->box, cpu);
- *per_cpu_ptr(pmu->box, cpu) = NULL;
- if (box && atomic_dec_and_test(&box->refcnt))
- list_add(&box->list, &boxes_to_free);
+ int i, pkg;
+
+ pkg = topology_logical_package_id(cpu);
+ for (; *types; types++) {
+ type = *types;
+ pmu = type->pmus;
+ for (i = 0; i < type->num_boxes; i++, pmu++) {
+ box = pmu->boxes[pkg];
+ if (box && atomic_dec_return(&box->refcnt) == 0)
+ uncore_box_exit(box);
}
}
}
-static int uncore_cpu_starting(int cpu)
+static void uncore_cpu_starting(int cpu, bool init)
{
- struct intel_uncore_type *type;
+ struct intel_uncore_type *type, **types = uncore_msr_uncores;
struct intel_uncore_pmu *pmu;
- struct intel_uncore_box *box, *exist;
- int i, j, k, phys_id;
-
- phys_id = topology_physical_package_id(cpu);
-
- for (i = 0; uncore_msr_uncores[i]; i++) {
- type = uncore_msr_uncores[i];
- for (j = 0; j < type->num_boxes; j++) {
- pmu = &type->pmus[j];
- box = *per_cpu_ptr(pmu->box, cpu);
- /* called by uncore_cpu_init? */
- if (box && box->phys_id >= 0) {
- uncore_box_init(box);
- continue;
- }
+ struct intel_uncore_box *box;
+ int i, pkg, ncpus = 1;
- for_each_online_cpu(k) {
- exist = *per_cpu_ptr(pmu->box, k);
- if (exist && exist->phys_id == phys_id) {
- atomic_inc(&exist->refcnt);
- *per_cpu_ptr(pmu->box, cpu) = exist;
- if (box) {
- list_add(&box->list,
- &boxes_to_free);
- box = NULL;
- }
- break;
- }
- }
+ if (init) {
+ /*
+ * On init we get the number of online cpus in the package
+ * and set refcount for all of them.
+ */
+ ncpus = cpumask_weight(topology_core_cpumask(cpu));
+ }
- if (box) {
- box->phys_id = phys_id;
+ pkg = topology_logical_package_id(cpu);
+ for (; *types; types++) {
+ type = *types;
+ pmu = type->pmus;
+ for (i = 0; i < type->num_boxes; i++, pmu++) {
+ box = pmu->boxes[pkg];
+ if (!box)
+ continue;
+ /* The first cpu on a package activates the box */
+ if (atomic_add_return(ncpus, &box->refcnt) == ncpus)
uncore_box_init(box);
- }
}
}
- return 0;
}
-static int uncore_cpu_prepare(int cpu, int phys_id)
+static int uncore_cpu_prepare(int cpu)
{
- struct intel_uncore_type *type;
+ struct intel_uncore_type *type, **types = uncore_msr_uncores;
struct intel_uncore_pmu *pmu;
struct intel_uncore_box *box;
- int i, j;
-
- for (i = 0; uncore_msr_uncores[i]; i++) {
- type = uncore_msr_uncores[i];
- for (j = 0; j < type->num_boxes; j++) {
- pmu = &type->pmus[j];
- if (pmu->func_id < 0)
- pmu->func_id = j;
-
+ int i, pkg;
+
+ pkg = topology_logical_package_id(cpu);
+ for (; *types; types++) {
+ type = *types;
+ pmu = type->pmus;
+ for (i = 0; i < type->num_boxes; i++, pmu++) {
+ if (pmu->boxes[pkg])
+ continue;
+ /* First cpu of a package allocates the box */
box = uncore_alloc_box(type, cpu_to_node(cpu));
if (!box)
return -ENOMEM;
-
box->pmu = pmu;
- box->phys_id = phys_id;
- *per_cpu_ptr(pmu->box, cpu) = box;
+ box->pkgid = pkg;
+ pmu->boxes[pkg] = box;
}
}
return 0;
}
-static void
-uncore_change_context(struct intel_uncore_type **uncores, int old_cpu, int new_cpu)
+static void uncore_change_type_ctx(struct intel_uncore_type *type, int old_cpu,
+ int new_cpu)
{
- struct intel_uncore_type *type;
- struct intel_uncore_pmu *pmu;
+ struct intel_uncore_pmu *pmu = type->pmus;
struct intel_uncore_box *box;
- int i, j;
+ int i, pkg;
- for (i = 0; uncores[i]; i++) {
- type = uncores[i];
- for (j = 0; j < type->num_boxes; j++) {
- pmu = &type->pmus[j];
- if (old_cpu < 0)
- box = uncore_pmu_to_box(pmu, new_cpu);
- else
- box = uncore_pmu_to_box(pmu, old_cpu);
- if (!box)
- continue;
-
- if (old_cpu < 0) {
- WARN_ON_ONCE(box->cpu != -1);
- box->cpu = new_cpu;
- continue;
- }
+ pkg = topology_logical_package_id(old_cpu < 0 ? new_cpu : old_cpu);
+ for (i = 0; i < type->num_boxes; i++, pmu++) {
+ box = pmu->boxes[pkg];
+ if (!box)
+ continue;
- WARN_ON_ONCE(box->cpu != old_cpu);
- if (new_cpu >= 0) {
- uncore_pmu_cancel_hrtimer(box);
- perf_pmu_migrate_context(&pmu->pmu,
- old_cpu, new_cpu);
- box->cpu = new_cpu;
- } else {
- box->cpu = -1;
- }
+ if (old_cpu < 0) {
+ WARN_ON_ONCE(box->cpu != -1);
+ box->cpu = new_cpu;
+ continue;
}
+
+ WARN_ON_ONCE(box->cpu != old_cpu);
+ box->cpu = -1;
+ if (new_cpu < 0)
+ continue;
+
+ uncore_pmu_cancel_hrtimer(box);
+ perf_pmu_migrate_context(&pmu->pmu, old_cpu, new_cpu);
+ box->cpu = new_cpu;
}
}
+static void uncore_change_context(struct intel_uncore_type **uncores,
+ int old_cpu, int new_cpu)
+{
+ for (; *uncores; uncores++)
+ uncore_change_type_ctx(*uncores, old_cpu, new_cpu);
+}
+
static void uncore_event_exit_cpu(int cpu)
{
- int i, phys_id, target;
+ int target;
- /* if exiting cpu is used for collecting uncore events */
+ /* Check if exiting cpu is used for collecting uncore events */
if (!cpumask_test_and_clear_cpu(cpu, &uncore_cpu_mask))
return;
- /* find a new cpu to collect uncore events */
- phys_id = topology_physical_package_id(cpu);
- target = -1;
- for_each_online_cpu(i) {
- if (i == cpu)
- continue;
- if (phys_id == topology_physical_package_id(i)) {
- target = i;
- break;
- }
- }
+ /* Find a new cpu to collect uncore events */
+ target = cpumask_any_but(topology_core_cpumask(cpu), cpu);
- /* migrate uncore events to the new cpu */
- if (target >= 0)
+ /* Migrate uncore events to the new target */
+ if (target < nr_cpu_ids)
cpumask_set_cpu(target, &uncore_cpu_mask);
+ else
+ target = -1;
uncore_change_context(uncore_msr_uncores, cpu, target);
uncore_change_context(uncore_pci_uncores, cpu, target);
@@ -1201,13 +1206,15 @@ static void uncore_event_exit_cpu(int cpu)
static void uncore_event_init_cpu(int cpu)
{
- int i, phys_id;
+ int target;
- phys_id = topology_physical_package_id(cpu);
- for_each_cpu(i, &uncore_cpu_mask) {
- if (phys_id == topology_physical_package_id(i))
- return;
- }
+ /*
+ * Check if there is an online cpu in the package
+ * which collects uncore events already.
+ */
+ target = cpumask_any_and(&uncore_cpu_mask, topology_core_cpumask(cpu));
+ if (target < nr_cpu_ids)
+ return;
cpumask_set_cpu(cpu, &uncore_cpu_mask);
@@ -1220,39 +1227,25 @@ static int uncore_cpu_notifier(struct notifier_block *self,
{
unsigned int cpu = (long)hcpu;
- /* allocate/free data structure for uncore box */
switch (action & ~CPU_TASKS_FROZEN) {
case CPU_UP_PREPARE:
- uncore_cpu_prepare(cpu, -1);
- break;
+ return notifier_from_errno(uncore_cpu_prepare(cpu));
+
case CPU_STARTING:
- uncore_cpu_starting(cpu);
+ uncore_cpu_starting(cpu, false);
+ case CPU_DOWN_FAILED:
+ uncore_event_init_cpu(cpu);
break;
+
case CPU_UP_CANCELED:
case CPU_DYING:
uncore_cpu_dying(cpu);
break;
- case CPU_ONLINE:
- case CPU_DEAD:
- uncore_kfree_boxes();
- break;
- default:
- break;
- }
- /* select the cpu that collects uncore events */
- switch (action & ~CPU_TASKS_FROZEN) {
- case CPU_DOWN_FAILED:
- case CPU_STARTING:
- uncore_event_init_cpu(cpu);
- break;
case CPU_DOWN_PREPARE:
uncore_event_exit_cpu(cpu);
break;
- default:
- break;
}
-
return NOTIFY_OK;
}
@@ -1265,9 +1258,29 @@ static struct notifier_block uncore_cpu_nb = {
.priority = CPU_PRI_PERF + 1,
};
-static void __init uncore_cpu_setup(void *dummy)
+static int __init type_pmu_register(struct intel_uncore_type *type)
{
- uncore_cpu_starting(smp_processor_id());
+ int i, ret;
+
+ for (i = 0; i < type->num_boxes; i++) {
+ ret = uncore_pmu_register(&type->pmus[i]);
+ if (ret)
+ return ret;
+ }
+ return 0;
+}
+
+static int __init uncore_msr_pmus_register(void)
+{
+ struct intel_uncore_type **types = uncore_msr_uncores;
+ int ret;
+
+ for (; *types; types++) {
+ ret = type_pmu_register(*types);
+ if (ret)
+ return ret;
+ }
+ return 0;
}
static int __init uncore_cpu_init(void)
@@ -1311,71 +1324,61 @@ static int __init uncore_cpu_init(void)
knl_uncore_cpu_init();
break;
default:
- return 0;
+ return -ENODEV;
}
- ret = uncore_types_init(uncore_msr_uncores);
+ ret = uncore_types_init(uncore_msr_uncores, true);
if (ret)
- return ret;
+ goto err;
+ ret = uncore_msr_pmus_register();
+ if (ret)
+ goto err;
return 0;
+err:
+ uncore_types_exit(uncore_msr_uncores);
+ uncore_msr_uncores = empty_uncore;
+ return ret;
}
-static int __init uncore_pmus_register(void)
+static void __init uncore_cpu_setup(void *dummy)
{
- struct intel_uncore_pmu *pmu;
- struct intel_uncore_type *type;
- int i, j;
-
- for (i = 0; uncore_msr_uncores[i]; i++) {
- type = uncore_msr_uncores[i];
- for (j = 0; j < type->num_boxes; j++) {
- pmu = &type->pmus[j];
- uncore_pmu_register(pmu);
- }
- }
-
- return 0;
+ uncore_cpu_starting(smp_processor_id(), true);
}
-static void __init uncore_cpumask_init(void)
-{
- int cpu;
-
- /*
- * ony invoke once from msr or pci init code
- */
- if (!cpumask_empty(&uncore_cpu_mask))
- return;
+/* Lazy to avoid allocation of a few bytes for the normal case */
+static __initdata DECLARE_BITMAP(packages, MAX_LOCAL_APIC);
- cpu_notifier_register_begin();
+static int __init uncore_cpumask_init(bool msr)
+{
+ unsigned int cpu;
for_each_online_cpu(cpu) {
- int i, phys_id = topology_physical_package_id(cpu);
+ unsigned int pkg = topology_logical_package_id(cpu);
+ int ret;
- for_each_cpu(i, &uncore_cpu_mask) {
- if (phys_id == topology_physical_package_id(i)) {
- phys_id = -1;
- break;
- }
- }
- if (phys_id < 0)
+ if (test_and_set_bit(pkg, packages))
continue;
-
- uncore_cpu_prepare(cpu, phys_id);
+ /*
+ * The first online cpu of each package allocates and takes
+ * the refcounts for all other online cpus in that package.
+ * If msrs are not enabled no allocation is required.
+ */
+ if (msr) {
+ ret = uncore_cpu_prepare(cpu);
+ if (ret)
+ return ret;
+ }
uncore_event_init_cpu(cpu);
+ smp_call_function_single(cpu, uncore_cpu_setup, NULL, 1);
}
- on_each_cpu(uncore_cpu_setup, NULL, 1);
-
__register_cpu_notifier(&uncore_cpu_nb);
-
- cpu_notifier_register_done();
+ return 0;
}
-
static int __init intel_uncore_init(void)
{
- int ret;
+ int pret, cret, ret;
if (boot_cpu_data.x86_vendor != X86_VENDOR_INTEL)
return -ENODEV;
@@ -1383,19 +1386,27 @@ static int __init intel_uncore_init(void)
if (cpu_has_hypervisor)
return -ENODEV;
- ret = uncore_pci_init();
- if (ret)
- goto fail;
- ret = uncore_cpu_init();
- if (ret) {
- uncore_pci_exit();
- goto fail;
- }
- uncore_cpumask_init();
+ max_packages = topology_max_packages();
+
+ pret = uncore_pci_init();
+ cret = uncore_cpu_init();
- uncore_pmus_register();
+ if (cret && pret)
+ return -ENODEV;
+
+ cpu_notifier_register_begin();
+ ret = uncore_cpumask_init(!cret);
+ if (ret)
+ goto err;
+ cpu_notifier_register_done();
return 0;
-fail:
+
+err:
+ /* Undo box->init_box() */
+ on_each_cpu_mask(&uncore_cpu_mask, uncore_exit_boxes, NULL, 1);
+ uncore_types_exit(uncore_msr_uncores);
+ uncore_pci_exit();
+ cpu_notifier_register_done();
return ret;
}
device_initcall(intel_uncore_init);
diff --git a/arch/x86/kernel/cpu/perf_event_intel_uncore.h b/arch/x86/events/intel/uncore.h
similarity index 90%
rename from arch/x86/kernel/cpu/perf_event_intel_uncore.h
rename to arch/x86/events/intel/uncore.h
index a7086b8621566..79766b9a35809 100644
--- a/arch/x86/kernel/cpu/perf_event_intel_uncore.h
+++ b/arch/x86/events/intel/uncore.h
@@ -1,8 +1,10 @@
#include
#include
#include
+#include
+
#include
-#include "perf_event.h"
+#include "../perf_event.h"
#define UNCORE_PMU_NAME_LEN 32
#define UNCORE_PMU_HRTIMER_INTERVAL (60LL * NSEC_PER_SEC)
@@ -19,11 +21,12 @@
#define UNCORE_EXTRA_PCI_DEV 0xff
#define UNCORE_EXTRA_PCI_DEV_MAX 3
-/* support up to 8 sockets */
-#define UNCORE_SOCKET_MAX 8
-
#define UNCORE_EVENT_CONSTRAINT(c, n) EVENT_CONSTRAINT(c, n, 0xff)
+struct pci_extra_dev {
+ struct pci_dev *dev[UNCORE_EXTRA_PCI_DEV_MAX];
+};
+
struct intel_uncore_ops;
struct intel_uncore_pmu;
struct intel_uncore_box;
@@ -61,6 +64,7 @@ struct intel_uncore_type {
struct intel_uncore_ops {
void (*init_box)(struct intel_uncore_box *);
+ void (*exit_box)(struct intel_uncore_box *);
void (*disable_box)(struct intel_uncore_box *);
void (*enable_box)(struct intel_uncore_box *);
void (*disable_event)(struct intel_uncore_box *, struct perf_event *);
@@ -73,13 +77,14 @@ struct intel_uncore_ops {
};
struct intel_uncore_pmu {
- struct pmu pmu;
- char name[UNCORE_PMU_NAME_LEN];
- int pmu_idx;
- int func_id;
- struct intel_uncore_type *type;
- struct intel_uncore_box ** __percpu box;
- struct list_head box_list;
+ struct pmu pmu;
+ char name[UNCORE_PMU_NAME_LEN];
+ int pmu_idx;
+ int func_id;
+ bool registered;
+ atomic_t activeboxes;
+ struct intel_uncore_type *type;
+ struct intel_uncore_box **boxes;
};
struct intel_uncore_extra_reg {
@@ -89,7 +94,8 @@ struct intel_uncore_extra_reg {
};
struct intel_uncore_box {
- int phys_id;
+ int pci_phys_id;
+ int pkgid;
int n_active; /* number of active events */
int n_events;
int cpu; /* cpu to collect events */
@@ -123,7 +129,6 @@ struct pci2phy_map {
int pbus_to_physid[256];
};
-int uncore_pcibus_to_physid(struct pci_bus *bus);
struct pci2phy_map *__find_pci2phy_map(int segment);
ssize_t uncore_event_show(struct kobject *kobj,
@@ -305,14 +310,30 @@ static inline void uncore_box_init(struct intel_uncore_box *box)
}
}
+static inline void uncore_box_exit(struct intel_uncore_box *box)
+{
+ if (test_and_clear_bit(UNCORE_BOX_FLAG_INITIATED, &box->flags)) {
+ if (box->pmu->type->ops->exit_box)
+ box->pmu->type->ops->exit_box(box);
+ }
+}
+
static inline bool uncore_box_is_fake(struct intel_uncore_box *box)
{
- return (box->phys_id < 0);
+ return (box->pkgid < 0);
+}
+
+static inline struct intel_uncore_pmu *uncore_event_to_pmu(struct perf_event *event)
+{
+ return container_of(event->pmu, struct intel_uncore_pmu, pmu);
+}
+
+static inline struct intel_uncore_box *uncore_event_to_box(struct perf_event *event)
+{
+ return event->pmu_private;
}
-struct intel_uncore_pmu *uncore_event_to_pmu(struct perf_event *event);
struct intel_uncore_box *uncore_pmu_to_box(struct intel_uncore_pmu *pmu, int cpu);
-struct intel_uncore_box *uncore_event_to_box(struct perf_event *event);
u64 uncore_msr_read_counter(struct intel_uncore_box *box, struct perf_event *event);
void uncore_pmu_start_hrtimer(struct intel_uncore_box *box);
void uncore_pmu_cancel_hrtimer(struct intel_uncore_box *box);
@@ -328,7 +349,7 @@ extern struct intel_uncore_type **uncore_pci_uncores;
extern struct pci_driver *uncore_pci_driver;
extern raw_spinlock_t pci2phy_map_lock;
extern struct list_head pci2phy_map_head;
-extern struct pci_dev *uncore_extra_pci_dev[UNCORE_SOCKET_MAX][UNCORE_EXTRA_PCI_DEV_MAX];
+extern struct pci_extra_dev *uncore_extra_pci_dev;
extern struct event_constraint uncore_constraint_empty;
/* perf_event_intel_uncore_snb.c */
diff --git a/arch/x86/kernel/cpu/perf_event_intel_uncore_nhmex.c b/arch/x86/events/intel/uncore_nhmex.c
similarity index 99%
rename from arch/x86/kernel/cpu/perf_event_intel_uncore_nhmex.c
rename to arch/x86/events/intel/uncore_nhmex.c
index 2749965afed0b..cda5693320050 100644
--- a/arch/x86/kernel/cpu/perf_event_intel_uncore_nhmex.c
+++ b/arch/x86/events/intel/uncore_nhmex.c
@@ -1,5 +1,5 @@
/* Nehalem-EX/Westmere-EX uncore support */
-#include "perf_event_intel_uncore.h"
+#include "uncore.h"
/* NHM-EX event control */
#define NHMEX_PMON_CTL_EV_SEL_MASK 0x000000ff
@@ -201,6 +201,11 @@ static void nhmex_uncore_msr_init_box(struct intel_uncore_box *box)
wrmsrl(NHMEX_U_MSR_PMON_GLOBAL_CTL, NHMEX_U_PMON_GLOBAL_EN_ALL);
}
+static void nhmex_uncore_msr_exit_box(struct intel_uncore_box *box)
+{
+ wrmsrl(NHMEX_U_MSR_PMON_GLOBAL_CTL, 0);
+}
+
static void nhmex_uncore_msr_disable_box(struct intel_uncore_box *box)
{
unsigned msr = uncore_msr_box_ctl(box);
@@ -250,6 +255,7 @@ static void nhmex_uncore_msr_enable_event(struct intel_uncore_box *box, struct p
#define NHMEX_UNCORE_OPS_COMMON_INIT() \
.init_box = nhmex_uncore_msr_init_box, \
+ .exit_box = nhmex_uncore_msr_exit_box, \
.disable_box = nhmex_uncore_msr_disable_box, \
.enable_box = nhmex_uncore_msr_enable_box, \
.disable_event = nhmex_uncore_msr_disable_event, \
diff --git a/arch/x86/kernel/cpu/perf_event_intel_uncore_snb.c b/arch/x86/events/intel/uncore_snb.c
similarity index 98%
rename from arch/x86/kernel/cpu/perf_event_intel_uncore_snb.c
rename to arch/x86/events/intel/uncore_snb.c
index 2bd030ddd0db3..96531d2b843fc 100644
--- a/arch/x86/kernel/cpu/perf_event_intel_uncore_snb.c
+++ b/arch/x86/events/intel/uncore_snb.c
@@ -1,5 +1,5 @@
/* Nehalem/SandBridge/Haswell uncore support */
-#include "perf_event_intel_uncore.h"
+#include "uncore.h"
/* Uncore IMC PCI IDs */
#define PCI_DEVICE_ID_INTEL_SNB_IMC 0x0100
@@ -95,6 +95,12 @@ static void snb_uncore_msr_init_box(struct intel_uncore_box *box)
}
}
+static void snb_uncore_msr_exit_box(struct intel_uncore_box *box)
+{
+ if (box->pmu->pmu_idx == 0)
+ wrmsrl(SNB_UNC_PERF_GLOBAL_CTL, 0);
+}
+
static struct uncore_event_desc snb_uncore_events[] = {
INTEL_UNCORE_EVENT_DESC(clockticks, "event=0xff,umask=0x00"),
{ /* end: all zeroes */ },
@@ -116,6 +122,7 @@ static struct attribute_group snb_uncore_format_group = {
static struct intel_uncore_ops snb_uncore_msr_ops = {
.init_box = snb_uncore_msr_init_box,
+ .exit_box = snb_uncore_msr_exit_box,
.disable_event = snb_uncore_msr_disable_event,
.enable_event = snb_uncore_msr_enable_event,
.read_counter = uncore_msr_read_counter,
@@ -231,6 +238,11 @@ static void snb_uncore_imc_init_box(struct intel_uncore_box *box)
box->hrtimer_duration = UNCORE_SNB_IMC_HRTIMER_INTERVAL;
}
+static void snb_uncore_imc_exit_box(struct intel_uncore_box *box)
+{
+ iounmap(box->io_addr);
+}
+
static void snb_uncore_imc_enable_box(struct intel_uncore_box *box)
{}
@@ -301,6 +313,7 @@ static int snb_uncore_imc_event_init(struct perf_event *event)
return -EINVAL;
event->cpu = box->cpu;
+ event->pmu_private = box;
event->hw.idx = -1;
event->hw.last_tag = ~0ULL;
@@ -458,6 +471,7 @@ static struct pmu snb_uncore_imc_pmu = {
static struct intel_uncore_ops snb_uncore_imc_ops = {
.init_box = snb_uncore_imc_init_box,
+ .exit_box = snb_uncore_imc_exit_box,
.enable_box = snb_uncore_imc_enable_box,
.disable_box = snb_uncore_imc_disable_box,
.disable_event = snb_uncore_imc_disable_event,
diff --git a/arch/x86/kernel/cpu/perf_event_intel_uncore_snbep.c b/arch/x86/events/intel/uncore_snbep.c
similarity index 99%
rename from arch/x86/kernel/cpu/perf_event_intel_uncore_snbep.c
rename to arch/x86/events/intel/uncore_snbep.c
index 33acb884ccf1b..93f6bd9bf7610 100644
--- a/arch/x86/kernel/cpu/perf_event_intel_uncore_snbep.c
+++ b/arch/x86/events/intel/uncore_snbep.c
@@ -1,6 +1,5 @@
/* SandyBridge-EP/IvyTown uncore support */
-#include "perf_event_intel_uncore.h"
-
+#include "uncore.h"
/* SNB-EP Box level control */
#define SNBEP_PMON_BOX_CTL_RST_CTRL (1 << 0)
@@ -987,7 +986,9 @@ static void snbep_qpi_enable_event(struct intel_uncore_box *box, struct perf_eve
if (reg1->idx != EXTRA_REG_NONE) {
int idx = box->pmu->pmu_idx + SNBEP_PCI_QPI_PORT0_FILTER;
- struct pci_dev *filter_pdev = uncore_extra_pci_dev[box->phys_id][idx];
+ int pkg = topology_phys_to_logical_pkg(box->pci_phys_id);
+ struct pci_dev *filter_pdev = uncore_extra_pci_dev[pkg].dev[idx];
+
if (filter_pdev) {
pci_write_config_dword(filter_pdev, reg1->reg,
(u32)reg1->config);
@@ -2521,14 +2522,16 @@ static struct intel_uncore_type *hswep_msr_uncores[] = {
void hswep_uncore_cpu_init(void)
{
+ int pkg = topology_phys_to_logical_pkg(0);
+
if (hswep_uncore_cbox.num_boxes > boot_cpu_data.x86_max_cores)
hswep_uncore_cbox.num_boxes = boot_cpu_data.x86_max_cores;
/* Detect 6-8 core systems with only two SBOXes */
- if (uncore_extra_pci_dev[0][HSWEP_PCI_PCU_3]) {
+ if (uncore_extra_pci_dev[pkg].dev[HSWEP_PCI_PCU_3]) {
u32 capid4;
- pci_read_config_dword(uncore_extra_pci_dev[0][HSWEP_PCI_PCU_3],
+ pci_read_config_dword(uncore_extra_pci_dev[pkg].dev[HSWEP_PCI_PCU_3],
0x94, &capid4);
if (((capid4 >> 6) & 0x3) == 0)
hswep_uncore_sbox.num_boxes = 2;
@@ -2875,11 +2878,13 @@ static struct intel_uncore_type bdx_uncore_sbox = {
.format_group = &hswep_uncore_sbox_format_group,
};
+#define BDX_MSR_UNCORE_SBOX 3
+
static struct intel_uncore_type *bdx_msr_uncores[] = {
&bdx_uncore_ubox,
&bdx_uncore_cbox,
- &bdx_uncore_sbox,
&hswep_uncore_pcu,
+ &bdx_uncore_sbox,
NULL,
};
@@ -2888,6 +2893,10 @@ void bdx_uncore_cpu_init(void)
if (bdx_uncore_cbox.num_boxes > boot_cpu_data.x86_max_cores)
bdx_uncore_cbox.num_boxes = boot_cpu_data.x86_max_cores;
uncore_msr_uncores = bdx_msr_uncores;
+
+ /* BDX-DE doesn't have SBOX */
+ if (boot_cpu_data.x86_model == 86)
+ uncore_msr_uncores[BDX_MSR_UNCORE_SBOX] = NULL;
}
static struct intel_uncore_type bdx_uncore_ha = {
diff --git a/arch/x86/kernel/cpu/perf_event_msr.c b/arch/x86/events/msr.c
similarity index 100%
rename from arch/x86/kernel/cpu/perf_event_msr.c
rename to arch/x86/events/msr.c
diff --git a/arch/x86/kernel/cpu/perf_event.h b/arch/x86/events/perf_event.h
similarity index 99%
rename from arch/x86/kernel/cpu/perf_event.h
rename to arch/x86/events/perf_event.h
index 7bb61e32fb29f..68155cafa8a13 100644
--- a/arch/x86/kernel/cpu/perf_event.h
+++ b/arch/x86/events/perf_event.h
@@ -586,6 +586,7 @@ struct x86_pmu {
pebs_broken :1,
pebs_prec_dist :1;
int pebs_record_size;
+ int pebs_buffer_size;
void (*drain_pebs)(struct pt_regs *regs);
struct event_constraint *pebs_constraints;
void (*pebs_aliases)(struct perf_event *event);
@@ -860,6 +861,8 @@ extern struct event_constraint intel_ivb_pebs_event_constraints[];
extern struct event_constraint intel_hsw_pebs_event_constraints[];
+extern struct event_constraint intel_bdw_pebs_event_constraints[];
+
extern struct event_constraint intel_skl_pebs_event_constraints[];
struct event_constraint *intel_pebs_constraints(struct perf_event *event);
@@ -904,6 +907,8 @@ void intel_pmu_lbr_init_skl(void);
void intel_pmu_lbr_init_knl(void);
+void intel_pmu_pebs_data_source_nhm(void);
+
int intel_pmu_setup_lbr_filter(struct perf_event *event);
void intel_pt_interrupt(void);
diff --git a/arch/x86/include/asm/alternative.h b/arch/x86/include/asm/alternative.h
index 7bfc85bbb8ffc..99afb665a004c 100644
--- a/arch/x86/include/asm/alternative.h
+++ b/arch/x86/include/asm/alternative.h
@@ -151,12 +151,6 @@ static inline int alternatives_text_reserved(void *start, void *end)
ALTINSTR_REPLACEMENT(newinstr2, feature2, 2) \
".popsection"
-/*
- * This must be included *after* the definition of ALTERNATIVE due to
- *
- */
-#include
-
/*
* Alternative instructions for different CPU types or capabilities.
*
diff --git a/arch/x86/include/asm/amd_nb.h b/arch/x86/include/asm/amd_nb.h
index 3c56ef1ae068e..5e828da2e18f6 100644
--- a/arch/x86/include/asm/amd_nb.h
+++ b/arch/x86/include/asm/amd_nb.h
@@ -27,15 +27,23 @@ struct amd_l3_cache {
};
struct threshold_block {
- unsigned int block;
- unsigned int bank;
- unsigned int cpu;
- u32 address;
- u16 interrupt_enable;
- bool interrupt_capable;
- u16 threshold_limit;
- struct kobject kobj;
- struct list_head miscj;
+ unsigned int block; /* Number within bank */
+ unsigned int bank; /* MCA bank the block belongs to */
+ unsigned int cpu; /* CPU which controls MCA bank */
+ u32 address; /* MSR address for the block */
+ u16 interrupt_enable; /* Enable/Disable APIC interrupt */
+ bool interrupt_capable; /* Bank can generate an interrupt. */
+
+ u16 threshold_limit; /*
+ * Value upon which threshold
+ * interrupt is generated.
+ */
+
+ struct kobject kobj; /* sysfs object */
+ struct list_head miscj; /*
+ * List of threshold blocks
+ * within a bank.
+ */
};
struct threshold_bank {
diff --git a/arch/x86/include/asm/apic.h b/arch/x86/include/asm/apic.h
index c80f6b6f3da22..0899cfc8dfe8f 100644
--- a/arch/x86/include/asm/apic.h
+++ b/arch/x86/include/asm/apic.h
@@ -6,7 +6,6 @@
#include
#include
-#include
#include
#include
#include
diff --git a/arch/x86/include/asm/arch_hweight.h b/arch/x86/include/asm/arch_hweight.h
index 259a7c1ef7099..02e799fa43d1b 100644
--- a/arch/x86/include/asm/arch_hweight.h
+++ b/arch/x86/include/asm/arch_hweight.h
@@ -1,6 +1,8 @@
#ifndef _ASM_X86_HWEIGHT_H
#define _ASM_X86_HWEIGHT_H
+#include
+
#ifdef CONFIG_64BIT
/* popcnt %edi, %eax -- redundant REX prefix for alignment */
#define POPCNT32 ".byte 0xf3,0x40,0x0f,0xb8,0xc7"
diff --git a/arch/x86/include/asm/asm.h b/arch/x86/include/asm/asm.h
index 189679aba7035..f5063b6659eb3 100644
--- a/arch/x86/include/asm/asm.h
+++ b/arch/x86/include/asm/asm.h
@@ -44,19 +44,22 @@
/* Exception table entry */
#ifdef __ASSEMBLY__
-# define _ASM_EXTABLE(from,to) \
+# define _ASM_EXTABLE_HANDLE(from, to, handler) \
.pushsection "__ex_table","a" ; \
- .balign 8 ; \
+ .balign 4 ; \
.long (from) - . ; \
.long (to) - . ; \
+ .long (handler) - . ; \
.popsection
-# define _ASM_EXTABLE_EX(from,to) \
- .pushsection "__ex_table","a" ; \
- .balign 8 ; \
- .long (from) - . ; \
- .long (to) - . + 0x7ffffff0 ; \
- .popsection
+# define _ASM_EXTABLE(from, to) \
+ _ASM_EXTABLE_HANDLE(from, to, ex_handler_default)
+
+# define _ASM_EXTABLE_FAULT(from, to) \
+ _ASM_EXTABLE_HANDLE(from, to, ex_handler_fault)
+
+# define _ASM_EXTABLE_EX(from, to) \
+ _ASM_EXTABLE_HANDLE(from, to, ex_handler_ext)
# define _ASM_NOKPROBE(entry) \
.pushsection "_kprobe_blacklist","aw" ; \
@@ -89,19 +92,24 @@
.endm
#else
-# define _ASM_EXTABLE(from,to) \
+# define _EXPAND_EXTABLE_HANDLE(x) #x
+# define _ASM_EXTABLE_HANDLE(from, to, handler) \
" .pushsection \"__ex_table\",\"a\"\n" \
- " .balign 8\n" \
+ " .balign 4\n" \
" .long (" #from ") - .\n" \
" .long (" #to ") - .\n" \
+ " .long (" _EXPAND_EXTABLE_HANDLE(handler) ") - .\n" \
" .popsection\n"
-# define _ASM_EXTABLE_EX(from,to) \
- " .pushsection \"__ex_table\",\"a\"\n" \
- " .balign 8\n" \
- " .long (" #from ") - .\n" \
- " .long (" #to ") - . + 0x7ffffff0\n" \
- " .popsection\n"
+# define _ASM_EXTABLE(from, to) \
+ _ASM_EXTABLE_HANDLE(from, to, ex_handler_default)
+
+# define _ASM_EXTABLE_FAULT(from, to) \
+ _ASM_EXTABLE_HANDLE(from, to, ex_handler_fault)
+
+# define _ASM_EXTABLE_EX(from, to) \
+ _ASM_EXTABLE_HANDLE(from, to, ex_handler_ext)
+
/* For C file, we already have NOKPROBE_SYMBOL macro */
#endif
diff --git a/arch/x86/include/asm/barrier.h b/arch/x86/include/asm/barrier.h
index a584e1c509184..bfb28caf97b1b 100644
--- a/arch/x86/include/asm/barrier.h
+++ b/arch/x86/include/asm/barrier.h
@@ -6,18 +6,17 @@
/*
* Force strict CPU ordering.
- * And yes, this is required on UP too when we're talking
+ * And yes, this might be required on UP too when we're talking
* to devices.
*/
#ifdef CONFIG_X86_32
-/*
- * Some non-Intel clones support out of order store. wmb() ceases to be a
- * nop for these.
- */
-#define mb() alternative("lock; addl $0,0(%%esp)", "mfence", X86_FEATURE_XMM2)
-#define rmb() alternative("lock; addl $0,0(%%esp)", "lfence", X86_FEATURE_XMM2)
-#define wmb() alternative("lock; addl $0,0(%%esp)", "sfence", X86_FEATURE_XMM)
+#define mb() asm volatile(ALTERNATIVE("lock; addl $0,0(%%esp)", "mfence", \
+ X86_FEATURE_XMM2) ::: "memory", "cc")
+#define rmb() asm volatile(ALTERNATIVE("lock; addl $0,0(%%esp)", "lfence", \
+ X86_FEATURE_XMM2) ::: "memory", "cc")
+#define wmb() asm volatile(ALTERNATIVE("lock; addl $0,0(%%esp)", "sfence", \
+ X86_FEATURE_XMM2) ::: "memory", "cc")
#else
#define mb() asm volatile("mfence":::"memory")
#define rmb() asm volatile("lfence":::"memory")
diff --git a/arch/x86/include/asm/bitops.h b/arch/x86/include/asm/bitops.h
index cfe3b954d5e41..7766d1cf096e8 100644
--- a/arch/x86/include/asm/bitops.h
+++ b/arch/x86/include/asm/bitops.h
@@ -91,7 +91,7 @@ set_bit(long nr, volatile unsigned long *addr)
* If it's called on the same region of memory simultaneously, the effect
* may be that only one operation succeeds.
*/
-static inline void __set_bit(long nr, volatile unsigned long *addr)
+static __always_inline void __set_bit(long nr, volatile unsigned long *addr)
{
asm volatile("bts %1,%0" : ADDR : "Ir" (nr) : "memory");
}
@@ -128,13 +128,13 @@ clear_bit(long nr, volatile unsigned long *addr)
* clear_bit() is atomic and implies release semantics before the memory
* operation. It can be used for an unlock.
*/
-static inline void clear_bit_unlock(long nr, volatile unsigned long *addr)
+static __always_inline void clear_bit_unlock(long nr, volatile unsigned long *addr)
{
barrier();
clear_bit(nr, addr);
}
-static inline void __clear_bit(long nr, volatile unsigned long *addr)
+static __always_inline void __clear_bit(long nr, volatile unsigned long *addr)
{
asm volatile("btr %1,%0" : ADDR : "Ir" (nr));
}
@@ -151,7 +151,7 @@ static inline void __clear_bit(long nr, volatile unsigned long *addr)
* No memory barrier is required here, because x86 cannot reorder stores past
* older loads. Same principle as spin_unlock.
*/
-static inline void __clear_bit_unlock(long nr, volatile unsigned long *addr)
+static __always_inline void __clear_bit_unlock(long nr, volatile unsigned long *addr)
{
barrier();
__clear_bit(nr, addr);
@@ -166,7 +166,7 @@ static inline void __clear_bit_unlock(long nr, volatile unsigned long *addr)
* If it's called on the same region of memory simultaneously, the effect
* may be that only one operation succeeds.
*/
-static inline void __change_bit(long nr, volatile unsigned long *addr)
+static __always_inline void __change_bit(long nr, volatile unsigned long *addr)
{
asm volatile("btc %1,%0" : ADDR : "Ir" (nr));
}
@@ -180,7 +180,7 @@ static inline void __change_bit(long nr, volatile unsigned long *addr)
* Note that @nr may be almost arbitrarily large; this function is not
* restricted to acting on a single-word quantity.
*/
-static inline void change_bit(long nr, volatile unsigned long *addr)
+static __always_inline void change_bit(long nr, volatile unsigned long *addr)
{
if (IS_IMMEDIATE(nr)) {
asm volatile(LOCK_PREFIX "xorb %1,%0"
@@ -201,7 +201,7 @@ static inline void change_bit(long nr, volatile unsigned long *addr)
* This operation is atomic and cannot be reordered.
* It also implies a memory barrier.
*/
-static inline int test_and_set_bit(long nr, volatile unsigned long *addr)
+static __always_inline int test_and_set_bit(long nr, volatile unsigned long *addr)
{
GEN_BINARY_RMWcc(LOCK_PREFIX "bts", *addr, "Ir", nr, "%0", "c");
}
@@ -228,7 +228,7 @@ test_and_set_bit_lock(long nr, volatile unsigned long *addr)
* If two examples of this operation race, one can appear to succeed
* but actually fail. You must protect multiple accesses with a lock.
*/
-static inline int __test_and_set_bit(long nr, volatile unsigned long *addr)
+static __always_inline int __test_and_set_bit(long nr, volatile unsigned long *addr)
{
int oldbit;
@@ -247,7 +247,7 @@ static inline int __test_and_set_bit(long nr, volatile unsigned long *addr)
* This operation is atomic and cannot be reordered.
* It also implies a memory barrier.
*/
-static inline int test_and_clear_bit(long nr, volatile unsigned long *addr)
+static __always_inline int test_and_clear_bit(long nr, volatile unsigned long *addr)
{
GEN_BINARY_RMWcc(LOCK_PREFIX "btr", *addr, "Ir", nr, "%0", "c");
}
@@ -268,7 +268,7 @@ static inline int test_and_clear_bit(long nr, volatile unsigned long *addr)
* accessed from a hypervisor on the same CPU if running in a VM: don't change
* this without also updating arch/x86/kernel/kvm.c
*/
-static inline int __test_and_clear_bit(long nr, volatile unsigned long *addr)
+static __always_inline int __test_and_clear_bit(long nr, volatile unsigned long *addr)
{
int oldbit;
@@ -280,7 +280,7 @@ static inline int __test_and_clear_bit(long nr, volatile unsigned long *addr)
}
/* WARNING: non atomic and it can be reordered! */
-static inline int __test_and_change_bit(long nr, volatile unsigned long *addr)
+static __always_inline int __test_and_change_bit(long nr, volatile unsigned long *addr)
{
int oldbit;
@@ -300,7 +300,7 @@ static inline int __test_and_change_bit(long nr, volatile unsigned long *addr)
* This operation is atomic and cannot be reordered.
* It also implies a memory barrier.
*/
-static inline int test_and_change_bit(long nr, volatile unsigned long *addr)
+static __always_inline int test_and_change_bit(long nr, volatile unsigned long *addr)
{
GEN_BINARY_RMWcc(LOCK_PREFIX "btc", *addr, "Ir", nr, "%0", "c");
}
@@ -311,7 +311,7 @@ static __always_inline int constant_test_bit(long nr, const volatile unsigned lo
(addr[nr >> _BITOPS_LONG_SHIFT])) != 0;
}
-static inline int variable_test_bit(long nr, volatile const unsigned long *addr)
+static __always_inline int variable_test_bit(long nr, volatile const unsigned long *addr)
{
int oldbit;
@@ -343,7 +343,7 @@ static int test_bit(int nr, const volatile unsigned long *addr);
*
* Undefined if no bit exists, so code should check against 0 first.
*/
-static inline unsigned long __ffs(unsigned long word)
+static __always_inline unsigned long __ffs(unsigned long word)
{
asm("rep; bsf %1,%0"
: "=r" (word)
@@ -357,7 +357,7 @@ static inline unsigned long __ffs(unsigned long word)
*
* Undefined if no zero exists, so code should check against ~0UL first.
*/
-static inline unsigned long ffz(unsigned long word)
+static __always_inline unsigned long ffz(unsigned long word)
{
asm("rep; bsf %1,%0"
: "=r" (word)
@@ -371,7 +371,7 @@ static inline unsigned long ffz(unsigned long word)
*
* Undefined if no set bit exists, so code should check against 0 first.
*/
-static inline unsigned long __fls(unsigned long word)
+static __always_inline unsigned long __fls(unsigned long word)
{
asm("bsr %1,%0"
: "=r" (word)
@@ -393,7 +393,7 @@ static inline unsigned long __fls(unsigned long word)
* set bit if value is nonzero. The first (least significant) bit
* is at position 1.
*/
-static inline int ffs(int x)
+static __always_inline int ffs(int x)
{
int r;
@@ -434,7 +434,7 @@ static inline int ffs(int x)
* set bit if value is nonzero. The last (most significant) bit is
* at position 32.
*/
-static inline int fls(int x)
+static __always_inline int fls(int x)
{
int r;
diff --git a/arch/x86/include/asm/cacheflush.h b/arch/x86/include/asm/cacheflush.h
index e63aa38e85fb2..61518cf794376 100644
--- a/arch/x86/include/asm/cacheflush.h
+++ b/arch/x86/include/asm/cacheflush.h
@@ -91,16 +91,10 @@ void clflush_cache_range(void *addr, unsigned int size);
#define mmio_flush_range(addr, size) clflush_cache_range(addr, size)
-#ifdef CONFIG_DEBUG_RODATA
-void mark_rodata_ro(void);
extern const int rodata_test_data;
extern int kernel_set_to_readonly;
void set_kernel_text_rw(void);
void set_kernel_text_ro(void);
-#else
-static inline void set_kernel_text_rw(void) { }
-static inline void set_kernel_text_ro(void) { }
-#endif
#ifdef CONFIG_DEBUG_RODATA_TEST
int rodata_test(void);
diff --git a/arch/x86/include/asm/clocksource.h b/arch/x86/include/asm/clocksource.h
index eda81dc0f4ae0..d194266acb28e 100644
--- a/arch/x86/include/asm/clocksource.h
+++ b/arch/x86/include/asm/clocksource.h
@@ -3,10 +3,11 @@
#ifndef _ASM_X86_CLOCKSOURCE_H
#define _ASM_X86_CLOCKSOURCE_H
-#define VCLOCK_NONE 0 /* No vDSO clock available. */
-#define VCLOCK_TSC 1 /* vDSO should use vread_tsc. */
-#define VCLOCK_HPET 2 /* vDSO should use vread_hpet. */
-#define VCLOCK_PVCLOCK 3 /* vDSO should use vread_pvclock. */
+#define VCLOCK_NONE 0 /* No vDSO clock available. */
+#define VCLOCK_TSC 1 /* vDSO should use vread_tsc. */
+#define VCLOCK_HPET 2 /* vDSO should use vread_hpet. */
+#define VCLOCK_PVCLOCK 3 /* vDSO should use vread_pvclock. */
+#define VCLOCK_MAX 3
struct arch_clocksource_data {
int vclock_mode;
diff --git a/arch/x86/include/asm/cmpxchg.h b/arch/x86/include/asm/cmpxchg.h
index ad19841eddfe1..9733361fed6f4 100644
--- a/arch/x86/include/asm/cmpxchg.h
+++ b/arch/x86/include/asm/cmpxchg.h
@@ -2,6 +2,7 @@
#define ASM_X86_CMPXCHG_H
#include
+#include
#include /* Provides LOCK_PREFIX */
/*
diff --git a/arch/x86/include/asm/cpufeature.h b/arch/x86/include/asm/cpufeature.h
index 7ad8c94642977..68e4e8258b841 100644
--- a/arch/x86/include/asm/cpufeature.h
+++ b/arch/x86/include/asm/cpufeature.h
@@ -1,288 +1,7 @@
-/*
- * Defines x86 CPU feature bits
- */
#ifndef _ASM_X86_CPUFEATURE_H
#define _ASM_X86_CPUFEATURE_H
-#ifndef _ASM_X86_REQUIRED_FEATURES_H
-#include
-#endif
-
-#ifndef _ASM_X86_DISABLED_FEATURES_H
-#include
-#endif
-
-#define NCAPINTS 16 /* N 32-bit words worth of info */
-#define NBUGINTS 1 /* N 32-bit bug flags */
-
-/*
- * Note: If the comment begins with a quoted string, that string is used
- * in /proc/cpuinfo instead of the macro name. If the string is "",
- * this feature bit is not displayed in /proc/cpuinfo at all.
- */
-
-/* Intel-defined CPU features, CPUID level 0x00000001 (edx), word 0 */
-#define X86_FEATURE_FPU ( 0*32+ 0) /* Onboard FPU */
-#define X86_FEATURE_VME ( 0*32+ 1) /* Virtual Mode Extensions */
-#define X86_FEATURE_DE ( 0*32+ 2) /* Debugging Extensions */
-#define X86_FEATURE_PSE ( 0*32+ 3) /* Page Size Extensions */
-#define X86_FEATURE_TSC ( 0*32+ 4) /* Time Stamp Counter */
-#define X86_FEATURE_MSR ( 0*32+ 5) /* Model-Specific Registers */
-#define X86_FEATURE_PAE ( 0*32+ 6) /* Physical Address Extensions */
-#define X86_FEATURE_MCE ( 0*32+ 7) /* Machine Check Exception */
-#define X86_FEATURE_CX8 ( 0*32+ 8) /* CMPXCHG8 instruction */
-#define X86_FEATURE_APIC ( 0*32+ 9) /* Onboard APIC */
-#define X86_FEATURE_SEP ( 0*32+11) /* SYSENTER/SYSEXIT */
-#define X86_FEATURE_MTRR ( 0*32+12) /* Memory Type Range Registers */
-#define X86_FEATURE_PGE ( 0*32+13) /* Page Global Enable */
-#define X86_FEATURE_MCA ( 0*32+14) /* Machine Check Architecture */
-#define X86_FEATURE_CMOV ( 0*32+15) /* CMOV instructions */
- /* (plus FCMOVcc, FCOMI with FPU) */
-#define X86_FEATURE_PAT ( 0*32+16) /* Page Attribute Table */
-#define X86_FEATURE_PSE36 ( 0*32+17) /* 36-bit PSEs */
-#define X86_FEATURE_PN ( 0*32+18) /* Processor serial number */
-#define X86_FEATURE_CLFLUSH ( 0*32+19) /* CLFLUSH instruction */
-#define X86_FEATURE_DS ( 0*32+21) /* "dts" Debug Store */
-#define X86_FEATURE_ACPI ( 0*32+22) /* ACPI via MSR */
-#define X86_FEATURE_MMX ( 0*32+23) /* Multimedia Extensions */
-#define X86_FEATURE_FXSR ( 0*32+24) /* FXSAVE/FXRSTOR, CR4.OSFXSR */
-#define X86_FEATURE_XMM ( 0*32+25) /* "sse" */
-#define X86_FEATURE_XMM2 ( 0*32+26) /* "sse2" */
-#define X86_FEATURE_SELFSNOOP ( 0*32+27) /* "ss" CPU self snoop */
-#define X86_FEATURE_HT ( 0*32+28) /* Hyper-Threading */
-#define X86_FEATURE_ACC ( 0*32+29) /* "tm" Automatic clock control */
-#define X86_FEATURE_IA64 ( 0*32+30) /* IA-64 processor */
-#define X86_FEATURE_PBE ( 0*32+31) /* Pending Break Enable */
-
-/* AMD-defined CPU features, CPUID level 0x80000001, word 1 */
-/* Don't duplicate feature flags which are redundant with Intel! */
-#define X86_FEATURE_SYSCALL ( 1*32+11) /* SYSCALL/SYSRET */
-#define X86_FEATURE_MP ( 1*32+19) /* MP Capable. */
-#define X86_FEATURE_NX ( 1*32+20) /* Execute Disable */
-#define X86_FEATURE_MMXEXT ( 1*32+22) /* AMD MMX extensions */
-#define X86_FEATURE_FXSR_OPT ( 1*32+25) /* FXSAVE/FXRSTOR optimizations */
-#define X86_FEATURE_GBPAGES ( 1*32+26) /* "pdpe1gb" GB pages */
-#define X86_FEATURE_RDTSCP ( 1*32+27) /* RDTSCP */
-#define X86_FEATURE_LM ( 1*32+29) /* Long Mode (x86-64) */
-#define X86_FEATURE_3DNOWEXT ( 1*32+30) /* AMD 3DNow! extensions */
-#define X86_FEATURE_3DNOW ( 1*32+31) /* 3DNow! */
-
-/* Transmeta-defined CPU features, CPUID level 0x80860001, word 2 */
-#define X86_FEATURE_RECOVERY ( 2*32+ 0) /* CPU in recovery mode */
-#define X86_FEATURE_LONGRUN ( 2*32+ 1) /* Longrun power control */
-#define X86_FEATURE_LRTI ( 2*32+ 3) /* LongRun table interface */
-
-/* Other features, Linux-defined mapping, word 3 */
-/* This range is used for feature bits which conflict or are synthesized */
-#define X86_FEATURE_CXMMX ( 3*32+ 0) /* Cyrix MMX extensions */
-#define X86_FEATURE_K6_MTRR ( 3*32+ 1) /* AMD K6 nonstandard MTRRs */
-#define X86_FEATURE_CYRIX_ARR ( 3*32+ 2) /* Cyrix ARRs (= MTRRs) */
-#define X86_FEATURE_CENTAUR_MCR ( 3*32+ 3) /* Centaur MCRs (= MTRRs) */
-/* cpu types for specific tunings: */
-#define X86_FEATURE_K8 ( 3*32+ 4) /* "" Opteron, Athlon64 */
-#define X86_FEATURE_K7 ( 3*32+ 5) /* "" Athlon */
-#define X86_FEATURE_P3 ( 3*32+ 6) /* "" P3 */
-#define X86_FEATURE_P4 ( 3*32+ 7) /* "" P4 */
-#define X86_FEATURE_CONSTANT_TSC ( 3*32+ 8) /* TSC ticks at a constant rate */
-#define X86_FEATURE_UP ( 3*32+ 9) /* smp kernel running on up */
-/* free, was #define X86_FEATURE_FXSAVE_LEAK ( 3*32+10) * "" FXSAVE leaks FOP/FIP/FOP */
-#define X86_FEATURE_ARCH_PERFMON ( 3*32+11) /* Intel Architectural PerfMon */
-#define X86_FEATURE_PEBS ( 3*32+12) /* Precise-Event Based Sampling */
-#define X86_FEATURE_BTS ( 3*32+13) /* Branch Trace Store */
-#define X86_FEATURE_SYSCALL32 ( 3*32+14) /* "" syscall in ia32 userspace */
-#define X86_FEATURE_SYSENTER32 ( 3*32+15) /* "" sysenter in ia32 userspace */
-#define X86_FEATURE_REP_GOOD ( 3*32+16) /* rep microcode works well */
-#define X86_FEATURE_MFENCE_RDTSC ( 3*32+17) /* "" Mfence synchronizes RDTSC */
-#define X86_FEATURE_LFENCE_RDTSC ( 3*32+18) /* "" Lfence synchronizes RDTSC */
-/* free, was #define X86_FEATURE_11AP ( 3*32+19) * "" Bad local APIC aka 11AP */
-#define X86_FEATURE_NOPL ( 3*32+20) /* The NOPL (0F 1F) instructions */
-#define X86_FEATURE_ALWAYS ( 3*32+21) /* "" Always-present feature */
-#define X86_FEATURE_XTOPOLOGY ( 3*32+22) /* cpu topology enum extensions */
-#define X86_FEATURE_TSC_RELIABLE ( 3*32+23) /* TSC is known to be reliable */
-#define X86_FEATURE_NONSTOP_TSC ( 3*32+24) /* TSC does not stop in C states */
-/* free, was #define X86_FEATURE_CLFLUSH_MONITOR ( 3*32+25) * "" clflush reqd with monitor */
-#define X86_FEATURE_EXTD_APICID ( 3*32+26) /* has extended APICID (8 bits) */
-#define X86_FEATURE_AMD_DCM ( 3*32+27) /* multi-node processor */
-#define X86_FEATURE_APERFMPERF ( 3*32+28) /* APERFMPERF */
-#define X86_FEATURE_EAGER_FPU ( 3*32+29) /* "eagerfpu" Non lazy FPU restore */
-#define X86_FEATURE_NONSTOP_TSC_S3 ( 3*32+30) /* TSC doesn't stop in S3 state */
-
-/* Intel-defined CPU features, CPUID level 0x00000001 (ecx), word 4 */
-#define X86_FEATURE_XMM3 ( 4*32+ 0) /* "pni" SSE-3 */
-#define X86_FEATURE_PCLMULQDQ ( 4*32+ 1) /* PCLMULQDQ instruction */
-#define X86_FEATURE_DTES64 ( 4*32+ 2) /* 64-bit Debug Store */
-#define X86_FEATURE_MWAIT ( 4*32+ 3) /* "monitor" Monitor/Mwait support */
-#define X86_FEATURE_DSCPL ( 4*32+ 4) /* "ds_cpl" CPL Qual. Debug Store */
-#define X86_FEATURE_VMX ( 4*32+ 5) /* Hardware virtualization */
-#define X86_FEATURE_SMX ( 4*32+ 6) /* Safer mode */
-#define X86_FEATURE_EST ( 4*32+ 7) /* Enhanced SpeedStep */
-#define X86_FEATURE_TM2 ( 4*32+ 8) /* Thermal Monitor 2 */
-#define X86_FEATURE_SSSE3 ( 4*32+ 9) /* Supplemental SSE-3 */
-#define X86_FEATURE_CID ( 4*32+10) /* Context ID */
-#define X86_FEATURE_SDBG ( 4*32+11) /* Silicon Debug */
-#define X86_FEATURE_FMA ( 4*32+12) /* Fused multiply-add */
-#define X86_FEATURE_CX16 ( 4*32+13) /* CMPXCHG16B */
-#define X86_FEATURE_XTPR ( 4*32+14) /* Send Task Priority Messages */
-#define X86_FEATURE_PDCM ( 4*32+15) /* Performance Capabilities */
-#define X86_FEATURE_PCID ( 4*32+17) /* Process Context Identifiers */
-#define X86_FEATURE_DCA ( 4*32+18) /* Direct Cache Access */
-#define X86_FEATURE_XMM4_1 ( 4*32+19) /* "sse4_1" SSE-4.1 */
-#define X86_FEATURE_XMM4_2 ( 4*32+20) /* "sse4_2" SSE-4.2 */
-#define X86_FEATURE_X2APIC ( 4*32+21) /* x2APIC */
-#define X86_FEATURE_MOVBE ( 4*32+22) /* MOVBE instruction */
-#define X86_FEATURE_POPCNT ( 4*32+23) /* POPCNT instruction */
-#define X86_FEATURE_TSC_DEADLINE_TIMER ( 4*32+24) /* Tsc deadline timer */
-#define X86_FEATURE_AES ( 4*32+25) /* AES instructions */
-#define X86_FEATURE_XSAVE ( 4*32+26) /* XSAVE/XRSTOR/XSETBV/XGETBV */
-#define X86_FEATURE_OSXSAVE ( 4*32+27) /* "" XSAVE enabled in the OS */
-#define X86_FEATURE_AVX ( 4*32+28) /* Advanced Vector Extensions */
-#define X86_FEATURE_F16C ( 4*32+29) /* 16-bit fp conversions */
-#define X86_FEATURE_RDRAND ( 4*32+30) /* The RDRAND instruction */
-#define X86_FEATURE_HYPERVISOR ( 4*32+31) /* Running on a hypervisor */
-
-/* VIA/Cyrix/Centaur-defined CPU features, CPUID level 0xC0000001, word 5 */
-#define X86_FEATURE_XSTORE ( 5*32+ 2) /* "rng" RNG present (xstore) */
-#define X86_FEATURE_XSTORE_EN ( 5*32+ 3) /* "rng_en" RNG enabled */
-#define X86_FEATURE_XCRYPT ( 5*32+ 6) /* "ace" on-CPU crypto (xcrypt) */
-#define X86_FEATURE_XCRYPT_EN ( 5*32+ 7) /* "ace_en" on-CPU crypto enabled */
-#define X86_FEATURE_ACE2 ( 5*32+ 8) /* Advanced Cryptography Engine v2 */
-#define X86_FEATURE_ACE2_EN ( 5*32+ 9) /* ACE v2 enabled */
-#define X86_FEATURE_PHE ( 5*32+10) /* PadLock Hash Engine */
-#define X86_FEATURE_PHE_EN ( 5*32+11) /* PHE enabled */
-#define X86_FEATURE_PMM ( 5*32+12) /* PadLock Montgomery Multiplier */
-#define X86_FEATURE_PMM_EN ( 5*32+13) /* PMM enabled */
-
-/* More extended AMD flags: CPUID level 0x80000001, ecx, word 6 */
-#define X86_FEATURE_LAHF_LM ( 6*32+ 0) /* LAHF/SAHF in long mode */
-#define X86_FEATURE_CMP_LEGACY ( 6*32+ 1) /* If yes HyperThreading not valid */
-#define X86_FEATURE_SVM ( 6*32+ 2) /* Secure virtual machine */
-#define X86_FEATURE_EXTAPIC ( 6*32+ 3) /* Extended APIC space */
-#define X86_FEATURE_CR8_LEGACY ( 6*32+ 4) /* CR8 in 32-bit mode */
-#define X86_FEATURE_ABM ( 6*32+ 5) /* Advanced bit manipulation */
-#define X86_FEATURE_SSE4A ( 6*32+ 6) /* SSE-4A */
-#define X86_FEATURE_MISALIGNSSE ( 6*32+ 7) /* Misaligned SSE mode */
-#define X86_FEATURE_3DNOWPREFETCH ( 6*32+ 8) /* 3DNow prefetch instructions */
-#define X86_FEATURE_OSVW ( 6*32+ 9) /* OS Visible Workaround */
-#define X86_FEATURE_IBS ( 6*32+10) /* Instruction Based Sampling */
-#define X86_FEATURE_XOP ( 6*32+11) /* extended AVX instructions */
-#define X86_FEATURE_SKINIT ( 6*32+12) /* SKINIT/STGI instructions */
-#define X86_FEATURE_WDT ( 6*32+13) /* Watchdog timer */
-#define X86_FEATURE_LWP ( 6*32+15) /* Light Weight Profiling */
-#define X86_FEATURE_FMA4 ( 6*32+16) /* 4 operands MAC instructions */
-#define X86_FEATURE_TCE ( 6*32+17) /* translation cache extension */
-#define X86_FEATURE_NODEID_MSR ( 6*32+19) /* NodeId MSR */
-#define X86_FEATURE_TBM ( 6*32+21) /* trailing bit manipulations */
-#define X86_FEATURE_TOPOEXT ( 6*32+22) /* topology extensions CPUID leafs */
-#define X86_FEATURE_PERFCTR_CORE ( 6*32+23) /* core performance counter extensions */
-#define X86_FEATURE_PERFCTR_NB ( 6*32+24) /* NB performance counter extensions */
-#define X86_FEATURE_BPEXT (6*32+26) /* data breakpoint extension */
-#define X86_FEATURE_PERFCTR_L2 ( 6*32+28) /* L2 performance counter extensions */
-#define X86_FEATURE_MWAITX ( 6*32+29) /* MWAIT extension (MONITORX/MWAITX) */
-
-/*
- * Auxiliary flags: Linux defined - For features scattered in various
- * CPUID levels like 0x6, 0xA etc, word 7.
- *
- * Reuse free bits when adding new feature flags!
- */
-
-#define X86_FEATURE_CPB ( 7*32+ 2) /* AMD Core Performance Boost */
-#define X86_FEATURE_EPB ( 7*32+ 3) /* IA32_ENERGY_PERF_BIAS support */
-
-#define X86_FEATURE_HW_PSTATE ( 7*32+ 8) /* AMD HW-PState */
-#define X86_FEATURE_PROC_FEEDBACK ( 7*32+ 9) /* AMD ProcFeedbackInterface */
-
-#define X86_FEATURE_INTEL_PT ( 7*32+15) /* Intel Processor Trace */
-
-/* Virtualization flags: Linux defined, word 8 */
-#define X86_FEATURE_TPR_SHADOW ( 8*32+ 0) /* Intel TPR Shadow */
-#define X86_FEATURE_VNMI ( 8*32+ 1) /* Intel Virtual NMI */
-#define X86_FEATURE_FLEXPRIORITY ( 8*32+ 2) /* Intel FlexPriority */
-#define X86_FEATURE_EPT ( 8*32+ 3) /* Intel Extended Page Table */
-#define X86_FEATURE_VPID ( 8*32+ 4) /* Intel Virtual Processor ID */
-
-#define X86_FEATURE_VMMCALL ( 8*32+15) /* Prefer vmmcall to vmcall */
-#define X86_FEATURE_XENPV ( 8*32+16) /* "" Xen paravirtual guest */
-
-
-/* Intel-defined CPU features, CPUID level 0x00000007:0 (ebx), word 9 */
-#define X86_FEATURE_FSGSBASE ( 9*32+ 0) /* {RD/WR}{FS/GS}BASE instructions*/
-#define X86_FEATURE_TSC_ADJUST ( 9*32+ 1) /* TSC adjustment MSR 0x3b */
-#define X86_FEATURE_BMI1 ( 9*32+ 3) /* 1st group bit manipulation extensions */
-#define X86_FEATURE_HLE ( 9*32+ 4) /* Hardware Lock Elision */
-#define X86_FEATURE_AVX2 ( 9*32+ 5) /* AVX2 instructions */
-#define X86_FEATURE_SMEP ( 9*32+ 7) /* Supervisor Mode Execution Protection */
-#define X86_FEATURE_BMI2 ( 9*32+ 8) /* 2nd group bit manipulation extensions */
-#define X86_FEATURE_ERMS ( 9*32+ 9) /* Enhanced REP MOVSB/STOSB */
-#define X86_FEATURE_INVPCID ( 9*32+10) /* Invalidate Processor Context ID */
-#define X86_FEATURE_RTM ( 9*32+11) /* Restricted Transactional Memory */
-#define X86_FEATURE_CQM ( 9*32+12) /* Cache QoS Monitoring */
-#define X86_FEATURE_MPX ( 9*32+14) /* Memory Protection Extension */
-#define X86_FEATURE_AVX512F ( 9*32+16) /* AVX-512 Foundation */
-#define X86_FEATURE_RDSEED ( 9*32+18) /* The RDSEED instruction */
-#define X86_FEATURE_ADX ( 9*32+19) /* The ADCX and ADOX instructions */
-#define X86_FEATURE_SMAP ( 9*32+20) /* Supervisor Mode Access Prevention */
-#define X86_FEATURE_PCOMMIT ( 9*32+22) /* PCOMMIT instruction */
-#define X86_FEATURE_CLFLUSHOPT ( 9*32+23) /* CLFLUSHOPT instruction */
-#define X86_FEATURE_CLWB ( 9*32+24) /* CLWB instruction */
-#define X86_FEATURE_AVX512PF ( 9*32+26) /* AVX-512 Prefetch */
-#define X86_FEATURE_AVX512ER ( 9*32+27) /* AVX-512 Exponential and Reciprocal */
-#define X86_FEATURE_AVX512CD ( 9*32+28) /* AVX-512 Conflict Detection */
-#define X86_FEATURE_SHA_NI ( 9*32+29) /* SHA1/SHA256 Instruction Extensions */
-
-/* Extended state features, CPUID level 0x0000000d:1 (eax), word 10 */
-#define X86_FEATURE_XSAVEOPT (10*32+ 0) /* XSAVEOPT */
-#define X86_FEATURE_XSAVEC (10*32+ 1) /* XSAVEC */
-#define X86_FEATURE_XGETBV1 (10*32+ 2) /* XGETBV with ECX = 1 */
-#define X86_FEATURE_XSAVES (10*32+ 3) /* XSAVES/XRSTORS */
-
-/* Intel-defined CPU QoS Sub-leaf, CPUID level 0x0000000F:0 (edx), word 11 */
-#define X86_FEATURE_CQM_LLC (11*32+ 1) /* LLC QoS if 1 */
-
-/* Intel-defined CPU QoS Sub-leaf, CPUID level 0x0000000F:1 (edx), word 12 */
-#define X86_FEATURE_CQM_OCCUP_LLC (12*32+ 0) /* LLC occupancy monitoring if 1 */
-
-/* AMD-defined CPU features, CPUID level 0x80000008 (ebx), word 13 */
-#define X86_FEATURE_CLZERO (13*32+0) /* CLZERO instruction */
-
-/* Thermal and Power Management Leaf, CPUID level 0x00000006 (eax), word 14 */
-#define X86_FEATURE_DTHERM (14*32+ 0) /* Digital Thermal Sensor */
-#define X86_FEATURE_IDA (14*32+ 1) /* Intel Dynamic Acceleration */
-#define X86_FEATURE_ARAT (14*32+ 2) /* Always Running APIC Timer */
-#define X86_FEATURE_PLN (14*32+ 4) /* Intel Power Limit Notification */
-#define X86_FEATURE_PTS (14*32+ 6) /* Intel Package Thermal Status */
-#define X86_FEATURE_HWP (14*32+ 7) /* Intel Hardware P-states */
-#define X86_FEATURE_HWP_NOTIFY (14*32+ 8) /* HWP Notification */
-#define X86_FEATURE_HWP_ACT_WINDOW (14*32+ 9) /* HWP Activity Window */
-#define X86_FEATURE_HWP_EPP (14*32+10) /* HWP Energy Perf. Preference */
-#define X86_FEATURE_HWP_PKG_REQ (14*32+11) /* HWP Package Level Request */
-
-/* AMD SVM Feature Identification, CPUID level 0x8000000a (edx), word 15 */
-#define X86_FEATURE_NPT (15*32+ 0) /* Nested Page Table support */
-#define X86_FEATURE_LBRV (15*32+ 1) /* LBR Virtualization support */
-#define X86_FEATURE_SVML (15*32+ 2) /* "svm_lock" SVM locking MSR */
-#define X86_FEATURE_NRIPS (15*32+ 3) /* "nrip_save" SVM next_rip save */
-#define X86_FEATURE_TSCRATEMSR (15*32+ 4) /* "tsc_scale" TSC scaling support */
-#define X86_FEATURE_VMCBCLEAN (15*32+ 5) /* "vmcb_clean" VMCB clean bits support */
-#define X86_FEATURE_FLUSHBYASID (15*32+ 6) /* flush-by-ASID support */
-#define X86_FEATURE_DECODEASSISTS (15*32+ 7) /* Decode Assists support */
-#define X86_FEATURE_PAUSEFILTER (15*32+10) /* filtered pause intercept */
-#define X86_FEATURE_PFTHRESHOLD (15*32+12) /* pause filter threshold */
-
-/*
- * BUG word(s)
- */
-#define X86_BUG(x) (NCAPINTS*32 + (x))
-
-#define X86_BUG_F00F X86_BUG(0) /* Intel F00F */
-#define X86_BUG_FDIV X86_BUG(1) /* FPU FDIV */
-#define X86_BUG_COMA X86_BUG(2) /* Cyrix 6x86 coma */
-#define X86_BUG_AMD_TLB_MMATCH X86_BUG(3) /* "tlb_mmatch" AMD Erratum 383 */
-#define X86_BUG_AMD_APIC_C1E X86_BUG(4) /* "apic_c1e" AMD Erratum 400 */
-#define X86_BUG_11AP X86_BUG(5) /* Bad local APIC aka 11AP */
-#define X86_BUG_FXSAVE_LEAK X86_BUG(6) /* FXSAVE leaks FOP/FIP/FOP */
-#define X86_BUG_CLFLUSH_MONITOR X86_BUG(7) /* AAI65, CLFLUSH required before MONITOR */
-#define X86_BUG_SYSRET_SS_ATTRS X86_BUG(8) /* SYSRET doesn't fix up SS attrs */
+#include
#if defined(__KERNEL__) && !defined(__ASSEMBLY__)
@@ -369,8 +88,7 @@ extern const char * const x86_bug_flags[NBUGINTS*32];
* is not relevant.
*/
#define cpu_feature_enabled(bit) \
- (__builtin_constant_p(bit) && DISABLED_MASK_BIT_SET(bit) ? 0 : \
- cpu_has(&boot_cpu_data, bit))
+ (__builtin_constant_p(bit) && DISABLED_MASK_BIT_SET(bit) ? 0 : static_cpu_has(bit))
#define boot_cpu_has(bit) cpu_has(&boot_cpu_data, bit)
@@ -406,106 +124,19 @@ extern const char * const x86_bug_flags[NBUGINTS*32];
#define cpu_has_osxsave boot_cpu_has(X86_FEATURE_OSXSAVE)
#define cpu_has_hypervisor boot_cpu_has(X86_FEATURE_HYPERVISOR)
/*
- * Do not add any more of those clumsy macros - use static_cpu_has_safe() for
+ * Do not add any more of those clumsy macros - use static_cpu_has() for
* fast paths and boot_cpu_has() otherwise!
*/
-#if __GNUC__ >= 4 && defined(CONFIG_X86_FAST_FEATURE_TESTS)
-extern void warn_pre_alternatives(void);
-extern bool __static_cpu_has_safe(u16 bit);
-
+#if defined(CC_HAVE_ASM_GOTO) && defined(CONFIG_X86_FAST_FEATURE_TESTS)
/*
* Static testing of CPU features. Used the same as boot_cpu_has().
- * These are only valid after alternatives have run, but will statically
- * patch the target code for additional performance.
+ * These will statically patch the target code for additional
+ * performance.
*/
-static __always_inline __pure bool __static_cpu_has(u16 bit)
-{
-#ifdef CC_HAVE_ASM_GOTO
-
-#ifdef CONFIG_X86_DEBUG_STATIC_CPU_HAS
-
- /*
- * Catch too early usage of this before alternatives
- * have run.
- */
- asm_volatile_goto("1: jmp %l[t_warn]\n"
- "2:\n"
- ".section .altinstructions,\"a\"\n"
- " .long 1b - .\n"
- " .long 0\n" /* no replacement */
- " .word %P0\n" /* 1: do replace */
- " .byte 2b - 1b\n" /* source len */
- " .byte 0\n" /* replacement len */
- " .byte 0\n" /* pad len */
- ".previous\n"
- /* skipping size check since replacement size = 0 */
- : : "i" (X86_FEATURE_ALWAYS) : : t_warn);
-
-#endif
-
- asm_volatile_goto("1: jmp %l[t_no]\n"
- "2:\n"
- ".section .altinstructions,\"a\"\n"
- " .long 1b - .\n"
- " .long 0\n" /* no replacement */
- " .word %P0\n" /* feature bit */
- " .byte 2b - 1b\n" /* source len */
- " .byte 0\n" /* replacement len */
- " .byte 0\n" /* pad len */
- ".previous\n"
- /* skipping size check since replacement size = 0 */
- : : "i" (bit) : : t_no);
- return true;
- t_no:
- return false;
-
-#ifdef CONFIG_X86_DEBUG_STATIC_CPU_HAS
- t_warn:
- warn_pre_alternatives();
- return false;
-#endif
-
-#else /* CC_HAVE_ASM_GOTO */
-
- u8 flag;
- /* Open-coded due to __stringify() in ALTERNATIVE() */
- asm volatile("1: movb $0,%0\n"
- "2:\n"
- ".section .altinstructions,\"a\"\n"
- " .long 1b - .\n"
- " .long 3f - .\n"
- " .word %P1\n" /* feature bit */
- " .byte 2b - 1b\n" /* source len */
- " .byte 4f - 3f\n" /* replacement len */
- " .byte 0\n" /* pad len */
- ".previous\n"
- ".section .discard,\"aw\",@progbits\n"
- " .byte 0xff + (4f-3f) - (2b-1b)\n" /* size check */
- ".previous\n"
- ".section .altinstr_replacement,\"ax\"\n"
- "3: movb $1,%0\n"
- "4:\n"
- ".previous\n"
- : "=qm" (flag) : "i" (bit));
- return flag;
-
-#endif /* CC_HAVE_ASM_GOTO */
-}
-
-#define static_cpu_has(bit) \
-( \
- __builtin_constant_p(boot_cpu_has(bit)) ? \
- boot_cpu_has(bit) : \
- __builtin_constant_p(bit) ? \
- __static_cpu_has(bit) : \
- boot_cpu_has(bit) \
-)
-
-static __always_inline __pure bool _static_cpu_has_safe(u16 bit)
+static __always_inline __pure bool _static_cpu_has(u16 bit)
{
-#ifdef CC_HAVE_ASM_GOTO
- asm_volatile_goto("1: jmp %l[t_dynamic]\n"
+ asm_volatile_goto("1: jmp 6f\n"
"2:\n"
".skip -(((5f-4f) - (2b-1b)) > 0) * "
"((5f-4f) - (2b-1b)),0x90\n"
@@ -530,66 +161,34 @@ static __always_inline __pure bool _static_cpu_has_safe(u16 bit)
" .byte 0\n" /* repl len */
" .byte 0\n" /* pad len */
".previous\n"
- : : "i" (bit), "i" (X86_FEATURE_ALWAYS)
- : : t_dynamic, t_no);
+ ".section .altinstr_aux,\"ax\"\n"
+ "6:\n"
+ " testb %[bitnum],%[cap_byte]\n"
+ " jnz %l[t_yes]\n"
+ " jmp %l[t_no]\n"
+ ".previous\n"
+ : : "i" (bit), "i" (X86_FEATURE_ALWAYS),
+ [bitnum] "i" (1 << (bit & 7)),
+ [cap_byte] "m" (((const char *)boot_cpu_data.x86_capability)[bit >> 3])
+ : : t_yes, t_no);
+ t_yes:
return true;
t_no:
return false;
- t_dynamic:
- return __static_cpu_has_safe(bit);
-#else
- u8 flag;
- /* Open-coded due to __stringify() in ALTERNATIVE() */
- asm volatile("1: movb $2,%0\n"
- "2:\n"
- ".section .altinstructions,\"a\"\n"
- " .long 1b - .\n" /* src offset */
- " .long 3f - .\n" /* repl offset */
- " .word %P2\n" /* always replace */
- " .byte 2b - 1b\n" /* source len */
- " .byte 4f - 3f\n" /* replacement len */
- " .byte 0\n" /* pad len */
- ".previous\n"
- ".section .discard,\"aw\",@progbits\n"
- " .byte 0xff + (4f-3f) - (2b-1b)\n" /* size check */
- ".previous\n"
- ".section .altinstr_replacement,\"ax\"\n"
- "3: movb $0,%0\n"
- "4:\n"
- ".previous\n"
- ".section .altinstructions,\"a\"\n"
- " .long 1b - .\n" /* src offset */
- " .long 5f - .\n" /* repl offset */
- " .word %P1\n" /* feature bit */
- " .byte 4b - 3b\n" /* src len */
- " .byte 6f - 5f\n" /* repl len */
- " .byte 0\n" /* pad len */
- ".previous\n"
- ".section .discard,\"aw\",@progbits\n"
- " .byte 0xff + (6f-5f) - (4b-3b)\n" /* size check */
- ".previous\n"
- ".section .altinstr_replacement,\"ax\"\n"
- "5: movb $1,%0\n"
- "6:\n"
- ".previous\n"
- : "=qm" (flag)
- : "i" (bit), "i" (X86_FEATURE_ALWAYS));
- return (flag == 2 ? __static_cpu_has_safe(bit) : flag);
-#endif /* CC_HAVE_ASM_GOTO */
}
-#define static_cpu_has_safe(bit) \
+#define static_cpu_has(bit) \
( \
__builtin_constant_p(boot_cpu_has(bit)) ? \
boot_cpu_has(bit) : \
- _static_cpu_has_safe(bit) \
+ _static_cpu_has(bit) \
)
#else
/*
- * gcc 3.x is too stupid to do the static test; fall back to dynamic.
+ * Fall back to dynamic for gcc versions which don't support asm goto. Should be
+ * a minority now anyway.
*/
#define static_cpu_has(bit) boot_cpu_has(bit)
-#define static_cpu_has_safe(bit) boot_cpu_has(bit)
#endif
#define cpu_has_bug(c, bit) cpu_has(c, (bit))
@@ -597,7 +196,6 @@ static __always_inline __pure bool _static_cpu_has_safe(u16 bit)
#define clear_cpu_bug(c, bit) clear_cpu_cap(c, (bit))
#define static_cpu_has_bug(bit) static_cpu_has((bit))
-#define static_cpu_has_bug_safe(bit) static_cpu_has_safe((bit))
#define boot_cpu_has_bug(bit) cpu_has_bug(&boot_cpu_data, (bit))
#define MAX_CPU_FEATURES (NCAPINTS * 32)
diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h
new file mode 100644
index 0000000000000..074b7604bd512
--- /dev/null
+++ b/arch/x86/include/asm/cpufeatures.h
@@ -0,0 +1,300 @@
+#ifndef _ASM_X86_CPUFEATURES_H
+#define _ASM_X86_CPUFEATURES_H
+
+#ifndef _ASM_X86_REQUIRED_FEATURES_H
+#include
+#endif
+
+#ifndef _ASM_X86_DISABLED_FEATURES_H
+#include
+#endif
+
+/*
+ * Defines x86 CPU feature bits
+ */
+#define NCAPINTS 16 /* N 32-bit words worth of info */
+#define NBUGINTS 1 /* N 32-bit bug flags */
+
+/*
+ * Note: If the comment begins with a quoted string, that string is used
+ * in /proc/cpuinfo instead of the macro name. If the string is "",
+ * this feature bit is not displayed in /proc/cpuinfo at all.
+ */
+
+/* Intel-defined CPU features, CPUID level 0x00000001 (edx), word 0 */
+#define X86_FEATURE_FPU ( 0*32+ 0) /* Onboard FPU */
+#define X86_FEATURE_VME ( 0*32+ 1) /* Virtual Mode Extensions */
+#define X86_FEATURE_DE ( 0*32+ 2) /* Debugging Extensions */
+#define X86_FEATURE_PSE ( 0*32+ 3) /* Page Size Extensions */
+#define X86_FEATURE_TSC ( 0*32+ 4) /* Time Stamp Counter */
+#define X86_FEATURE_MSR ( 0*32+ 5) /* Model-Specific Registers */
+#define X86_FEATURE_PAE ( 0*32+ 6) /* Physical Address Extensions */
+#define X86_FEATURE_MCE ( 0*32+ 7) /* Machine Check Exception */
+#define X86_FEATURE_CX8 ( 0*32+ 8) /* CMPXCHG8 instruction */
+#define X86_FEATURE_APIC ( 0*32+ 9) /* Onboard APIC */
+#define X86_FEATURE_SEP ( 0*32+11) /* SYSENTER/SYSEXIT */
+#define X86_FEATURE_MTRR ( 0*32+12) /* Memory Type Range Registers */
+#define X86_FEATURE_PGE ( 0*32+13) /* Page Global Enable */
+#define X86_FEATURE_MCA ( 0*32+14) /* Machine Check Architecture */
+#define X86_FEATURE_CMOV ( 0*32+15) /* CMOV instructions */
+ /* (plus FCMOVcc, FCOMI with FPU) */
+#define X86_FEATURE_PAT ( 0*32+16) /* Page Attribute Table */
+#define X86_FEATURE_PSE36 ( 0*32+17) /* 36-bit PSEs */
+#define X86_FEATURE_PN ( 0*32+18) /* Processor serial number */
+#define X86_FEATURE_CLFLUSH ( 0*32+19) /* CLFLUSH instruction */
+#define X86_FEATURE_DS ( 0*32+21) /* "dts" Debug Store */
+#define X86_FEATURE_ACPI ( 0*32+22) /* ACPI via MSR */
+#define X86_FEATURE_MMX ( 0*32+23) /* Multimedia Extensions */
+#define X86_FEATURE_FXSR ( 0*32+24) /* FXSAVE/FXRSTOR, CR4.OSFXSR */
+#define X86_FEATURE_XMM ( 0*32+25) /* "sse" */
+#define X86_FEATURE_XMM2 ( 0*32+26) /* "sse2" */
+#define X86_FEATURE_SELFSNOOP ( 0*32+27) /* "ss" CPU self snoop */
+#define X86_FEATURE_HT ( 0*32+28) /* Hyper-Threading */
+#define X86_FEATURE_ACC ( 0*32+29) /* "tm" Automatic clock control */
+#define X86_FEATURE_IA64 ( 0*32+30) /* IA-64 processor */
+#define X86_FEATURE_PBE ( 0*32+31) /* Pending Break Enable */
+
+/* AMD-defined CPU features, CPUID level 0x80000001, word 1 */
+/* Don't duplicate feature flags which are redundant with Intel! */
+#define X86_FEATURE_SYSCALL ( 1*32+11) /* SYSCALL/SYSRET */
+#define X86_FEATURE_MP ( 1*32+19) /* MP Capable. */
+#define X86_FEATURE_NX ( 1*32+20) /* Execute Disable */
+#define X86_FEATURE_MMXEXT ( 1*32+22) /* AMD MMX extensions */
+#define X86_FEATURE_FXSR_OPT ( 1*32+25) /* FXSAVE/FXRSTOR optimizations */
+#define X86_FEATURE_GBPAGES ( 1*32+26) /* "pdpe1gb" GB pages */
+#define X86_FEATURE_RDTSCP ( 1*32+27) /* RDTSCP */
+#define X86_FEATURE_LM ( 1*32+29) /* Long Mode (x86-64) */
+#define X86_FEATURE_3DNOWEXT ( 1*32+30) /* AMD 3DNow! extensions */
+#define X86_FEATURE_3DNOW ( 1*32+31) /* 3DNow! */
+
+/* Transmeta-defined CPU features, CPUID level 0x80860001, word 2 */
+#define X86_FEATURE_RECOVERY ( 2*32+ 0) /* CPU in recovery mode */
+#define X86_FEATURE_LONGRUN ( 2*32+ 1) /* Longrun power control */
+#define X86_FEATURE_LRTI ( 2*32+ 3) /* LongRun table interface */
+
+/* Other features, Linux-defined mapping, word 3 */
+/* This range is used for feature bits which conflict or are synthesized */
+#define X86_FEATURE_CXMMX ( 3*32+ 0) /* Cyrix MMX extensions */
+#define X86_FEATURE_K6_MTRR ( 3*32+ 1) /* AMD K6 nonstandard MTRRs */
+#define X86_FEATURE_CYRIX_ARR ( 3*32+ 2) /* Cyrix ARRs (= MTRRs) */
+#define X86_FEATURE_CENTAUR_MCR ( 3*32+ 3) /* Centaur MCRs (= MTRRs) */
+/* cpu types for specific tunings: */
+#define X86_FEATURE_K8 ( 3*32+ 4) /* "" Opteron, Athlon64 */
+#define X86_FEATURE_K7 ( 3*32+ 5) /* "" Athlon */
+#define X86_FEATURE_P3 ( 3*32+ 6) /* "" P3 */
+#define X86_FEATURE_P4 ( 3*32+ 7) /* "" P4 */
+#define X86_FEATURE_CONSTANT_TSC ( 3*32+ 8) /* TSC ticks at a constant rate */
+#define X86_FEATURE_UP ( 3*32+ 9) /* smp kernel running on up */
+#define X86_FEATURE_ART ( 3*32+10) /* Platform has always running timer (ART) */
+#define X86_FEATURE_ARCH_PERFMON ( 3*32+11) /* Intel Architectural PerfMon */
+#define X86_FEATURE_PEBS ( 3*32+12) /* Precise-Event Based Sampling */
+#define X86_FEATURE_BTS ( 3*32+13) /* Branch Trace Store */
+#define X86_FEATURE_SYSCALL32 ( 3*32+14) /* "" syscall in ia32 userspace */
+#define X86_FEATURE_SYSENTER32 ( 3*32+15) /* "" sysenter in ia32 userspace */
+#define X86_FEATURE_REP_GOOD ( 3*32+16) /* rep microcode works well */
+#define X86_FEATURE_MFENCE_RDTSC ( 3*32+17) /* "" Mfence synchronizes RDTSC */
+#define X86_FEATURE_LFENCE_RDTSC ( 3*32+18) /* "" Lfence synchronizes RDTSC */
+/* free, was #define X86_FEATURE_11AP ( 3*32+19) * "" Bad local APIC aka 11AP */
+#define X86_FEATURE_NOPL ( 3*32+20) /* The NOPL (0F 1F) instructions */
+#define X86_FEATURE_ALWAYS ( 3*32+21) /* "" Always-present feature */
+#define X86_FEATURE_XTOPOLOGY ( 3*32+22) /* cpu topology enum extensions */
+#define X86_FEATURE_TSC_RELIABLE ( 3*32+23) /* TSC is known to be reliable */
+#define X86_FEATURE_NONSTOP_TSC ( 3*32+24) /* TSC does not stop in C states */
+/* free, was #define X86_FEATURE_CLFLUSH_MONITOR ( 3*32+25) * "" clflush reqd with monitor */
+#define X86_FEATURE_EXTD_APICID ( 3*32+26) /* has extended APICID (8 bits) */
+#define X86_FEATURE_AMD_DCM ( 3*32+27) /* multi-node processor */
+#define X86_FEATURE_APERFMPERF ( 3*32+28) /* APERFMPERF */
+#define X86_FEATURE_EAGER_FPU ( 3*32+29) /* "eagerfpu" Non lazy FPU restore */
+#define X86_FEATURE_NONSTOP_TSC_S3 ( 3*32+30) /* TSC doesn't stop in S3 state */
+#define X86_FEATURE_MCE_RECOVERY ( 3*32+31) /* cpu has recoverable machine checks */
+
+/* Intel-defined CPU features, CPUID level 0x00000001 (ecx), word 4 */
+#define X86_FEATURE_XMM3 ( 4*32+ 0) /* "pni" SSE-3 */
+#define X86_FEATURE_PCLMULQDQ ( 4*32+ 1) /* PCLMULQDQ instruction */
+#define X86_FEATURE_DTES64 ( 4*32+ 2) /* 64-bit Debug Store */
+#define X86_FEATURE_MWAIT ( 4*32+ 3) /* "monitor" Monitor/Mwait support */
+#define X86_FEATURE_DSCPL ( 4*32+ 4) /* "ds_cpl" CPL Qual. Debug Store */
+#define X86_FEATURE_VMX ( 4*32+ 5) /* Hardware virtualization */
+#define X86_FEATURE_SMX ( 4*32+ 6) /* Safer mode */
+#define X86_FEATURE_EST ( 4*32+ 7) /* Enhanced SpeedStep */
+#define X86_FEATURE_TM2 ( 4*32+ 8) /* Thermal Monitor 2 */
+#define X86_FEATURE_SSSE3 ( 4*32+ 9) /* Supplemental SSE-3 */
+#define X86_FEATURE_CID ( 4*32+10) /* Context ID */
+#define X86_FEATURE_SDBG ( 4*32+11) /* Silicon Debug */
+#define X86_FEATURE_FMA ( 4*32+12) /* Fused multiply-add */
+#define X86_FEATURE_CX16 ( 4*32+13) /* CMPXCHG16B */
+#define X86_FEATURE_XTPR ( 4*32+14) /* Send Task Priority Messages */
+#define X86_FEATURE_PDCM ( 4*32+15) /* Performance Capabilities */
+#define X86_FEATURE_PCID ( 4*32+17) /* Process Context Identifiers */
+#define X86_FEATURE_DCA ( 4*32+18) /* Direct Cache Access */
+#define X86_FEATURE_XMM4_1 ( 4*32+19) /* "sse4_1" SSE-4.1 */
+#define X86_FEATURE_XMM4_2 ( 4*32+20) /* "sse4_2" SSE-4.2 */
+#define X86_FEATURE_X2APIC ( 4*32+21) /* x2APIC */
+#define X86_FEATURE_MOVBE ( 4*32+22) /* MOVBE instruction */
+#define X86_FEATURE_POPCNT ( 4*32+23) /* POPCNT instruction */
+#define X86_FEATURE_TSC_DEADLINE_TIMER ( 4*32+24) /* Tsc deadline timer */
+#define X86_FEATURE_AES ( 4*32+25) /* AES instructions */
+#define X86_FEATURE_XSAVE ( 4*32+26) /* XSAVE/XRSTOR/XSETBV/XGETBV */
+#define X86_FEATURE_OSXSAVE ( 4*32+27) /* "" XSAVE enabled in the OS */
+#define X86_FEATURE_AVX ( 4*32+28) /* Advanced Vector Extensions */
+#define X86_FEATURE_F16C ( 4*32+29) /* 16-bit fp conversions */
+#define X86_FEATURE_RDRAND ( 4*32+30) /* The RDRAND instruction */
+#define X86_FEATURE_HYPERVISOR ( 4*32+31) /* Running on a hypervisor */
+
+/* VIA/Cyrix/Centaur-defined CPU features, CPUID level 0xC0000001, word 5 */
+#define X86_FEATURE_XSTORE ( 5*32+ 2) /* "rng" RNG present (xstore) */
+#define X86_FEATURE_XSTORE_EN ( 5*32+ 3) /* "rng_en" RNG enabled */
+#define X86_FEATURE_XCRYPT ( 5*32+ 6) /* "ace" on-CPU crypto (xcrypt) */
+#define X86_FEATURE_XCRYPT_EN ( 5*32+ 7) /* "ace_en" on-CPU crypto enabled */
+#define X86_FEATURE_ACE2 ( 5*32+ 8) /* Advanced Cryptography Engine v2 */
+#define X86_FEATURE_ACE2_EN ( 5*32+ 9) /* ACE v2 enabled */
+#define X86_FEATURE_PHE ( 5*32+10) /* PadLock Hash Engine */
+#define X86_FEATURE_PHE_EN ( 5*32+11) /* PHE enabled */
+#define X86_FEATURE_PMM ( 5*32+12) /* PadLock Montgomery Multiplier */
+#define X86_FEATURE_PMM_EN ( 5*32+13) /* PMM enabled */
+
+/* More extended AMD flags: CPUID level 0x80000001, ecx, word 6 */
+#define X86_FEATURE_LAHF_LM ( 6*32+ 0) /* LAHF/SAHF in long mode */
+#define X86_FEATURE_CMP_LEGACY ( 6*32+ 1) /* If yes HyperThreading not valid */
+#define X86_FEATURE_SVM ( 6*32+ 2) /* Secure virtual machine */
+#define X86_FEATURE_EXTAPIC ( 6*32+ 3) /* Extended APIC space */
+#define X86_FEATURE_CR8_LEGACY ( 6*32+ 4) /* CR8 in 32-bit mode */
+#define X86_FEATURE_ABM ( 6*32+ 5) /* Advanced bit manipulation */
+#define X86_FEATURE_SSE4A ( 6*32+ 6) /* SSE-4A */
+#define X86_FEATURE_MISALIGNSSE ( 6*32+ 7) /* Misaligned SSE mode */
+#define X86_FEATURE_3DNOWPREFETCH ( 6*32+ 8) /* 3DNow prefetch instructions */
+#define X86_FEATURE_OSVW ( 6*32+ 9) /* OS Visible Workaround */
+#define X86_FEATURE_IBS ( 6*32+10) /* Instruction Based Sampling */
+#define X86_FEATURE_XOP ( 6*32+11) /* extended AVX instructions */
+#define X86_FEATURE_SKINIT ( 6*32+12) /* SKINIT/STGI instructions */
+#define X86_FEATURE_WDT ( 6*32+13) /* Watchdog timer */
+#define X86_FEATURE_LWP ( 6*32+15) /* Light Weight Profiling */
+#define X86_FEATURE_FMA4 ( 6*32+16) /* 4 operands MAC instructions */
+#define X86_FEATURE_TCE ( 6*32+17) /* translation cache extension */
+#define X86_FEATURE_NODEID_MSR ( 6*32+19) /* NodeId MSR */
+#define X86_FEATURE_TBM ( 6*32+21) /* trailing bit manipulations */
+#define X86_FEATURE_TOPOEXT ( 6*32+22) /* topology extensions CPUID leafs */
+#define X86_FEATURE_PERFCTR_CORE ( 6*32+23) /* core performance counter extensions */
+#define X86_FEATURE_PERFCTR_NB ( 6*32+24) /* NB performance counter extensions */
+#define X86_FEATURE_BPEXT (6*32+26) /* data breakpoint extension */
+#define X86_FEATURE_PERFCTR_L2 ( 6*32+28) /* L2 performance counter extensions */
+#define X86_FEATURE_MWAITX ( 6*32+29) /* MWAIT extension (MONITORX/MWAITX) */
+
+/*
+ * Auxiliary flags: Linux defined - For features scattered in various
+ * CPUID levels like 0x6, 0xA etc, word 7.
+ *
+ * Reuse free bits when adding new feature flags!
+ */
+
+#define X86_FEATURE_CPB ( 7*32+ 2) /* AMD Core Performance Boost */
+#define X86_FEATURE_EPB ( 7*32+ 3) /* IA32_ENERGY_PERF_BIAS support */
+
+#define X86_FEATURE_HW_PSTATE ( 7*32+ 8) /* AMD HW-PState */
+#define X86_FEATURE_PROC_FEEDBACK ( 7*32+ 9) /* AMD ProcFeedbackInterface */
+
+#define X86_FEATURE_INTEL_PT ( 7*32+15) /* Intel Processor Trace */
+
+/* Virtualization flags: Linux defined, word 8 */
+#define X86_FEATURE_TPR_SHADOW ( 8*32+ 0) /* Intel TPR Shadow */
+#define X86_FEATURE_VNMI ( 8*32+ 1) /* Intel Virtual NMI */
+#define X86_FEATURE_FLEXPRIORITY ( 8*32+ 2) /* Intel FlexPriority */
+#define X86_FEATURE_EPT ( 8*32+ 3) /* Intel Extended Page Table */
+#define X86_FEATURE_VPID ( 8*32+ 4) /* Intel Virtual Processor ID */
+
+#define X86_FEATURE_VMMCALL ( 8*32+15) /* Prefer vmmcall to vmcall */
+#define X86_FEATURE_XENPV ( 8*32+16) /* "" Xen paravirtual guest */
+
+
+/* Intel-defined CPU features, CPUID level 0x00000007:0 (ebx), word 9 */
+#define X86_FEATURE_FSGSBASE ( 9*32+ 0) /* {RD/WR}{FS/GS}BASE instructions*/
+#define X86_FEATURE_TSC_ADJUST ( 9*32+ 1) /* TSC adjustment MSR 0x3b */
+#define X86_FEATURE_BMI1 ( 9*32+ 3) /* 1st group bit manipulation extensions */
+#define X86_FEATURE_HLE ( 9*32+ 4) /* Hardware Lock Elision */
+#define X86_FEATURE_AVX2 ( 9*32+ 5) /* AVX2 instructions */
+#define X86_FEATURE_SMEP ( 9*32+ 7) /* Supervisor Mode Execution Protection */
+#define X86_FEATURE_BMI2 ( 9*32+ 8) /* 2nd group bit manipulation extensions */
+#define X86_FEATURE_ERMS ( 9*32+ 9) /* Enhanced REP MOVSB/STOSB */
+#define X86_FEATURE_INVPCID ( 9*32+10) /* Invalidate Processor Context ID */
+#define X86_FEATURE_RTM ( 9*32+11) /* Restricted Transactional Memory */
+#define X86_FEATURE_CQM ( 9*32+12) /* Cache QoS Monitoring */
+#define X86_FEATURE_MPX ( 9*32+14) /* Memory Protection Extension */
+#define X86_FEATURE_AVX512F ( 9*32+16) /* AVX-512 Foundation */
+#define X86_FEATURE_AVX512DQ ( 9*32+17) /* AVX-512 DQ (Double/Quad granular) Instructions */
+#define X86_FEATURE_RDSEED ( 9*32+18) /* The RDSEED instruction */
+#define X86_FEATURE_ADX ( 9*32+19) /* The ADCX and ADOX instructions */
+#define X86_FEATURE_SMAP ( 9*32+20) /* Supervisor Mode Access Prevention */
+#define X86_FEATURE_PCOMMIT ( 9*32+22) /* PCOMMIT instruction */
+#define X86_FEATURE_CLFLUSHOPT ( 9*32+23) /* CLFLUSHOPT instruction */
+#define X86_FEATURE_CLWB ( 9*32+24) /* CLWB instruction */
+#define X86_FEATURE_AVX512PF ( 9*32+26) /* AVX-512 Prefetch */
+#define X86_FEATURE_AVX512ER ( 9*32+27) /* AVX-512 Exponential and Reciprocal */
+#define X86_FEATURE_AVX512CD ( 9*32+28) /* AVX-512 Conflict Detection */
+#define X86_FEATURE_SHA_NI ( 9*32+29) /* SHA1/SHA256 Instruction Extensions */
+#define X86_FEATURE_AVX512BW ( 9*32+30) /* AVX-512 BW (Byte/Word granular) Instructions */
+#define X86_FEATURE_AVX512VL ( 9*32+31) /* AVX-512 VL (128/256 Vector Length) Extensions */
+
+/* Extended state features, CPUID level 0x0000000d:1 (eax), word 10 */
+#define X86_FEATURE_XSAVEOPT (10*32+ 0) /* XSAVEOPT */
+#define X86_FEATURE_XSAVEC (10*32+ 1) /* XSAVEC */
+#define X86_FEATURE_XGETBV1 (10*32+ 2) /* XGETBV with ECX = 1 */
+#define X86_FEATURE_XSAVES (10*32+ 3) /* XSAVES/XRSTORS */
+
+/* Intel-defined CPU QoS Sub-leaf, CPUID level 0x0000000F:0 (edx), word 11 */
+#define X86_FEATURE_CQM_LLC (11*32+ 1) /* LLC QoS if 1 */
+
+/* Intel-defined CPU QoS Sub-leaf, CPUID level 0x0000000F:1 (edx), word 12 */
+#define X86_FEATURE_CQM_OCCUP_LLC (12*32+ 0) /* LLC occupancy monitoring if 1 */
+
+/* AMD-defined CPU features, CPUID level 0x80000008 (ebx), word 13 */
+#define X86_FEATURE_CLZERO (13*32+0) /* CLZERO instruction */
+
+/* Thermal and Power Management Leaf, CPUID level 0x00000006 (eax), word 14 */
+#define X86_FEATURE_DTHERM (14*32+ 0) /* Digital Thermal Sensor */
+#define X86_FEATURE_IDA (14*32+ 1) /* Intel Dynamic Acceleration */
+#define X86_FEATURE_ARAT (14*32+ 2) /* Always Running APIC Timer */
+#define X86_FEATURE_PLN (14*32+ 4) /* Intel Power Limit Notification */
+#define X86_FEATURE_PTS (14*32+ 6) /* Intel Package Thermal Status */
+#define X86_FEATURE_HWP (14*32+ 7) /* Intel Hardware P-states */
+#define X86_FEATURE_HWP_NOTIFY (14*32+ 8) /* HWP Notification */
+#define X86_FEATURE_HWP_ACT_WINDOW (14*32+ 9) /* HWP Activity Window */
+#define X86_FEATURE_HWP_EPP (14*32+10) /* HWP Energy Perf. Preference */
+#define X86_FEATURE_HWP_PKG_REQ (14*32+11) /* HWP Package Level Request */
+
+/* AMD SVM Feature Identification, CPUID level 0x8000000a (edx), word 15 */
+#define X86_FEATURE_NPT (15*32+ 0) /* Nested Page Table support */
+#define X86_FEATURE_LBRV (15*32+ 1) /* LBR Virtualization support */
+#define X86_FEATURE_SVML (15*32+ 2) /* "svm_lock" SVM locking MSR */
+#define X86_FEATURE_NRIPS (15*32+ 3) /* "nrip_save" SVM next_rip save */
+#define X86_FEATURE_TSCRATEMSR (15*32+ 4) /* "tsc_scale" TSC scaling support */
+#define X86_FEATURE_VMCBCLEAN (15*32+ 5) /* "vmcb_clean" VMCB clean bits support */
+#define X86_FEATURE_FLUSHBYASID (15*32+ 6) /* flush-by-ASID support */
+#define X86_FEATURE_DECODEASSISTS (15*32+ 7) /* Decode Assists support */
+#define X86_FEATURE_PAUSEFILTER (15*32+10) /* filtered pause intercept */
+#define X86_FEATURE_PFTHRESHOLD (15*32+12) /* pause filter threshold */
+#define X86_FEATURE_AVIC (15*32+13) /* Virtual Interrupt Controller */
+
+/*
+ * BUG word(s)
+ */
+#define X86_BUG(x) (NCAPINTS*32 + (x))
+
+#define X86_BUG_F00F X86_BUG(0) /* Intel F00F */
+#define X86_BUG_FDIV X86_BUG(1) /* FPU FDIV */
+#define X86_BUG_COMA X86_BUG(2) /* Cyrix 6x86 coma */
+#define X86_BUG_AMD_TLB_MMATCH X86_BUG(3) /* "tlb_mmatch" AMD Erratum 383 */
+#define X86_BUG_AMD_APIC_C1E X86_BUG(4) /* "apic_c1e" AMD Erratum 400 */
+#define X86_BUG_11AP X86_BUG(5) /* Bad local APIC aka 11AP */
+#define X86_BUG_FXSAVE_LEAK X86_BUG(6) /* FXSAVE leaks FOP/FIP/FOP */
+#define X86_BUG_CLFLUSH_MONITOR X86_BUG(7) /* AAI65, CLFLUSH required before MONITOR */
+#define X86_BUG_SYSRET_SS_ATTRS X86_BUG(8) /* SYSRET doesn't fix up SS attrs */
+
+#ifdef CONFIG_X86_32
+/*
+ * 64-bit kernels don't use X86_BUG_ESPFIX. Make the define conditional
+ * to avoid confusion.
+ */
+#define X86_BUG_ESPFIX X86_BUG(9) /* "" IRET to 16-bit SS corrupts ESP/RSP high bits */
+#endif
+
+#endif /* _ASM_X86_CPUFEATURES_H */
diff --git a/arch/x86/include/asm/desc_defs.h b/arch/x86/include/asm/desc_defs.h
index 278441f39856e..eb5deb42484d5 100644
--- a/arch/x86/include/asm/desc_defs.h
+++ b/arch/x86/include/asm/desc_defs.h
@@ -98,4 +98,27 @@ struct desc_ptr {
#endif /* !__ASSEMBLY__ */
+/* Access rights as returned by LAR */
+#define AR_TYPE_RODATA (0 * (1 << 9))
+#define AR_TYPE_RWDATA (1 * (1 << 9))
+#define AR_TYPE_RODATA_EXPDOWN (2 * (1 << 9))
+#define AR_TYPE_RWDATA_EXPDOWN (3 * (1 << 9))
+#define AR_TYPE_XOCODE (4 * (1 << 9))
+#define AR_TYPE_XRCODE (5 * (1 << 9))
+#define AR_TYPE_XOCODE_CONF (6 * (1 << 9))
+#define AR_TYPE_XRCODE_CONF (7 * (1 << 9))
+#define AR_TYPE_MASK (7 * (1 << 9))
+
+#define AR_DPL0 (0 * (1 << 13))
+#define AR_DPL3 (3 * (1 << 13))
+#define AR_DPL_MASK (3 * (1 << 13))
+
+#define AR_A (1 << 8) /* "Accessed" */
+#define AR_S (1 << 12) /* If clear, "System" segment */
+#define AR_P (1 << 15) /* "Present" */
+#define AR_AVL (1 << 20) /* "AVaiLable" (no HW effect) */
+#define AR_L (1 << 21) /* "Long mode" for code segments */
+#define AR_DB (1 << 22) /* D/B, effect depends on type */
+#define AR_G (1 << 23) /* "Granularity" (limit in pages) */
+
#endif /* _ASM_X86_DESC_DEFS_H */
diff --git a/arch/x86/include/asm/dmi.h b/arch/x86/include/asm/dmi.h
index 535192f6bfad8..3c69fed215c56 100644
--- a/arch/x86/include/asm/dmi.h
+++ b/arch/x86/include/asm/dmi.h
@@ -15,7 +15,7 @@ static __always_inline __init void *dmi_alloc(unsigned len)
/* Use early IO mappings for DMI because it's initialized early */
#define dmi_early_remap early_ioremap
#define dmi_early_unmap early_iounmap
-#define dmi_remap ioremap
+#define dmi_remap ioremap_cache
#define dmi_unmap iounmap
#endif /* _ASM_X86_DMI_H */
diff --git a/arch/x86/include/asm/elf.h b/arch/x86/include/asm/elf.h
index 1514753fd4355..15340e36ddcb3 100644
--- a/arch/x86/include/asm/elf.h
+++ b/arch/x86/include/asm/elf.h
@@ -256,7 +256,7 @@ extern int force_personality32;
instruction set this CPU supports. This could be done in user space,
but it's not easy, and we've already done it here. */
-#define ELF_HWCAP (boot_cpu_data.x86_capability[0])
+#define ELF_HWCAP (boot_cpu_data.x86_capability[CPUID_1_EDX])
/* This yields a string that ld.so will use to load implementation
specific libraries for optimization. This is more specific in
diff --git a/arch/x86/include/asm/fixmap.h b/arch/x86/include/asm/fixmap.h
index 6d7d0e52ed5a5..8554f960e21b7 100644
--- a/arch/x86/include/asm/fixmap.h
+++ b/arch/x86/include/asm/fixmap.h
@@ -138,7 +138,7 @@ extern void reserve_top_address(unsigned long reserve);
extern int fixmaps_set;
extern pte_t *kmap_pte;
-extern pgprot_t kmap_prot;
+#define kmap_prot PAGE_KERNEL
extern pte_t *pkmap_page_table;
void __native_set_fixmap(enum fixed_addresses idx, pte_t pte);
diff --git a/arch/x86/include/asm/fpu/internal.h b/arch/x86/include/asm/fpu/internal.h
index 0fd440df63f18..a2124343edf54 100644
--- a/arch/x86/include/asm/fpu/internal.h
+++ b/arch/x86/include/asm/fpu/internal.h
@@ -17,6 +17,7 @@
#include
#include
#include
+#include
/*
* High level FPU state handling functions:
@@ -58,22 +59,22 @@ extern u64 fpu__get_supported_xfeatures_mask(void);
*/
static __always_inline __pure bool use_eager_fpu(void)
{
- return static_cpu_has_safe(X86_FEATURE_EAGER_FPU);
+ return static_cpu_has(X86_FEATURE_EAGER_FPU);
}
static __always_inline __pure bool use_xsaveopt(void)
{
- return static_cpu_has_safe(X86_FEATURE_XSAVEOPT);
+ return static_cpu_has(X86_FEATURE_XSAVEOPT);
}
static __always_inline __pure bool use_xsave(void)
{
- return static_cpu_has_safe(X86_FEATURE_XSAVE);
+ return static_cpu_has(X86_FEATURE_XSAVE);
}
static __always_inline __pure bool use_fxsr(void)
{
- return static_cpu_has_safe(X86_FEATURE_FXSR);
+ return static_cpu_has(X86_FEATURE_FXSR);
}
/*
@@ -300,7 +301,7 @@ static inline void copy_xregs_to_kernel_booting(struct xregs_state *xstate)
WARN_ON(system_state != SYSTEM_BOOTING);
- if (static_cpu_has_safe(X86_FEATURE_XSAVES))
+ if (static_cpu_has(X86_FEATURE_XSAVES))
XSTATE_OP(XSAVES, xstate, lmask, hmask, err);
else
XSTATE_OP(XSAVE, xstate, lmask, hmask, err);
@@ -322,7 +323,7 @@ static inline void copy_kernel_to_xregs_booting(struct xregs_state *xstate)
WARN_ON(system_state != SYSTEM_BOOTING);
- if (static_cpu_has_safe(X86_FEATURE_XSAVES))
+ if (static_cpu_has(X86_FEATURE_XSAVES))
XSTATE_OP(XRSTORS, xstate, lmask, hmask, err);
else
XSTATE_OP(XRSTOR, xstate, lmask, hmask, err);
@@ -460,7 +461,7 @@ static inline void copy_kernel_to_fpregs(union fpregs_state *fpstate)
* pending. Clear the x87 state here by setting it to fixed values.
* "m" is a random variable that should be in L1.
*/
- if (unlikely(static_cpu_has_bug_safe(X86_BUG_FXSAVE_LEAK))) {
+ if (unlikely(static_cpu_has_bug(X86_BUG_FXSAVE_LEAK))) {
asm volatile(
"fnclex\n\t"
"emms\n\t"
@@ -589,7 +590,8 @@ switch_fpu_prepare(struct fpu *old_fpu, struct fpu *new_fpu, int cpu)
* If the task has used the math, pre-load the FPU on xsave processors
* or if the past 5 consecutive context-switches used math.
*/
- fpu.preload = new_fpu->fpstate_active &&
+ fpu.preload = static_cpu_has(X86_FEATURE_FPU) &&
+ new_fpu->fpstate_active &&
(use_eager_fpu() || new_fpu->counter > 5);
if (old_fpu->fpregs_active) {
diff --git a/arch/x86/include/asm/fpu/xstate.h b/arch/x86/include/asm/fpu/xstate.h
index af30fdeb140da..f23cd8c80b1c8 100644
--- a/arch/x86/include/asm/fpu/xstate.h
+++ b/arch/x86/include/asm/fpu/xstate.h
@@ -20,16 +20,15 @@
/* Supported features which support lazy state saving */
#define XFEATURE_MASK_LAZY (XFEATURE_MASK_FP | \
- XFEATURE_MASK_SSE)
-
-/* Supported features which require eager state saving */
-#define XFEATURE_MASK_EAGER (XFEATURE_MASK_BNDREGS | \
- XFEATURE_MASK_BNDCSR | \
+ XFEATURE_MASK_SSE | \
XFEATURE_MASK_YMM | \
XFEATURE_MASK_OPMASK | \
XFEATURE_MASK_ZMM_Hi256 | \
XFEATURE_MASK_Hi16_ZMM)
+/* Supported features which require eager state saving */
+#define XFEATURE_MASK_EAGER (XFEATURE_MASK_BNDREGS | XFEATURE_MASK_BNDCSR)
+
/* All currently supported features */
#define XCNTXT_MASK (XFEATURE_MASK_LAZY | XFEATURE_MASK_EAGER)
diff --git a/arch/x86/include/asm/frame.h b/arch/x86/include/asm/frame.h
index 793179cf8e21a..6e4d170726b75 100644
--- a/arch/x86/include/asm/frame.h
+++ b/arch/x86/include/asm/frame.h
@@ -1,23 +1,44 @@
-#ifdef __ASSEMBLY__
+#ifndef _ASM_X86_FRAME_H
+#define _ASM_X86_FRAME_H
#include
-/* The annotation hides the frame from the unwinder and makes it look
- like a ordinary ebp save/restore. This avoids some special cases for
- frame pointer later */
+/*
+ * These are stack frame creation macros. They should be used by every
+ * callable non-leaf asm function to make kernel stack traces more reliable.
+ */
+
#ifdef CONFIG_FRAME_POINTER
- .macro FRAME
- __ASM_SIZE(push,) %__ASM_REG(bp)
- __ASM_SIZE(mov) %__ASM_REG(sp), %__ASM_REG(bp)
- .endm
- .macro ENDFRAME
- __ASM_SIZE(pop,) %__ASM_REG(bp)
- .endm
-#else
- .macro FRAME
- .endm
- .macro ENDFRAME
- .endm
-#endif
-
-#endif /* __ASSEMBLY__ */
+
+#ifdef __ASSEMBLY__
+
+.macro FRAME_BEGIN
+ push %_ASM_BP
+ _ASM_MOV %_ASM_SP, %_ASM_BP
+.endm
+
+.macro FRAME_END
+ pop %_ASM_BP
+.endm
+
+#else /* !__ASSEMBLY__ */
+
+#define FRAME_BEGIN \
+ "push %" _ASM_BP "\n" \
+ _ASM_MOV "%" _ASM_SP ", %" _ASM_BP "\n"
+
+#define FRAME_END "pop %" _ASM_BP "\n"
+
+#endif /* __ASSEMBLY__ */
+
+#define FRAME_OFFSET __ASM_SEL(4, 8)
+
+#else /* !CONFIG_FRAME_POINTER */
+
+#define FRAME_BEGIN
+#define FRAME_END
+#define FRAME_OFFSET 0
+
+#endif /* CONFIG_FRAME_POINTER */
+
+#endif /* _ASM_X86_FRAME_H */
diff --git a/arch/x86/include/asm/imr.h b/arch/x86/include/asm/imr.h
index cd2ce4068441b..ebea2c9d2cdc0 100644
--- a/arch/x86/include/asm/imr.h
+++ b/arch/x86/include/asm/imr.h
@@ -53,7 +53,7 @@
#define IMR_MASK (IMR_ALIGN - 1)
int imr_add_range(phys_addr_t base, size_t size,
- unsigned int rmask, unsigned int wmask, bool lock);
+ unsigned int rmask, unsigned int wmask);
int imr_remove_range(phys_addr_t base, size_t size);
diff --git a/arch/x86/include/asm/ipi.h b/arch/x86/include/asm/ipi.h
index cfc9a0d2d07ce..a4fe16e42b7b2 100644
--- a/arch/x86/include/asm/ipi.h
+++ b/arch/x86/include/asm/ipi.h
@@ -57,67 +57,13 @@ static inline void __xapic_wait_icr_idle(void)
cpu_relax();
}
-static inline void
-__default_send_IPI_shortcut(unsigned int shortcut, int vector, unsigned int dest)
-{
- /*
- * Subtle. In the case of the 'never do double writes' workaround
- * we have to lock out interrupts to be safe. As we don't care
- * of the value read we use an atomic rmw access to avoid costly
- * cli/sti. Otherwise we use an even cheaper single atomic write
- * to the APIC.
- */
- unsigned int cfg;
-
- /*
- * Wait for idle.
- */
- __xapic_wait_icr_idle();
-
- /*
- * No need to touch the target chip field
- */
- cfg = __prepare_ICR(shortcut, vector, dest);
-
- /*
- * Send the IPI. The write to APIC_ICR fires this off.
- */
- native_apic_mem_write(APIC_ICR, cfg);
-}
+void __default_send_IPI_shortcut(unsigned int shortcut, int vector, unsigned int dest);
/*
* This is used to send an IPI with no shorthand notation (the destination is
* specified in bits 56 to 63 of the ICR).
*/
-static inline void
- __default_send_IPI_dest_field(unsigned int mask, int vector, unsigned int dest)
-{
- unsigned long cfg;
-
- /*
- * Wait for idle.
- */
- if (unlikely(vector == NMI_VECTOR))
- safe_apic_wait_icr_idle();
- else
- __xapic_wait_icr_idle();
-
- /*
- * prepare target chip field
- */
- cfg = __prepare_ICR2(mask);
- native_apic_mem_write(APIC_ICR2, cfg);
-
- /*
- * program the ICR
- */
- cfg = __prepare_ICR(0, vector, dest);
-
- /*
- * Send the IPI. The write to APIC_ICR fires this off.
- */
- native_apic_mem_write(APIC_ICR, cfg);
-}
+void __default_send_IPI_dest_field(unsigned int mask, int vector, unsigned int dest);
extern void default_send_IPI_single(int cpu, int vector);
extern void default_send_IPI_single_phys(int cpu, int vector);
diff --git a/arch/x86/include/asm/irq_work.h b/arch/x86/include/asm/irq_work.h
index 78162f8e248bd..d0afb05c84fc1 100644
--- a/arch/x86/include/asm/irq_work.h
+++ b/arch/x86/include/asm/irq_work.h
@@ -1,7 +1,7 @@
#ifndef _ASM_IRQ_WORK_H
#define _ASM_IRQ_WORK_H
-#include
+#include
static inline bool arch_irq_work_has_interrupt(void)
{
diff --git a/arch/x86/include/asm/kvm_para.h b/arch/x86/include/asm/kvm_para.h
index c1adf33fdd0d6..bc62e7cbf1b1f 100644
--- a/arch/x86/include/asm/kvm_para.h
+++ b/arch/x86/include/asm/kvm_para.h
@@ -17,15 +17,8 @@ static inline bool kvm_check_and_clear_guest_paused(void)
}
#endif /* CONFIG_KVM_GUEST */
-#ifdef CONFIG_DEBUG_RODATA
#define KVM_HYPERCALL \
ALTERNATIVE(".byte 0x0f,0x01,0xc1", ".byte 0x0f,0x01,0xd9", X86_FEATURE_VMMCALL)
-#else
-/* On AMD processors, vmcall will generate a trap that we will
- * then rewrite to the appropriate instruction.
- */
-#define KVM_HYPERCALL ".byte 0x0f,0x01,0xc1"
-#endif
/* For KVM hypercalls, a three-byte sequence of either the vmcall or the vmmcall
* instruction. The hypervisor may replace it with something else but only the
diff --git a/arch/x86/include/asm/mce.h b/arch/x86/include/asm/mce.h
index 2ea4527e462f1..92b6f651fa4fc 100644
--- a/arch/x86/include/asm/mce.h
+++ b/arch/x86/include/asm/mce.h
@@ -40,8 +40,20 @@
#define MCI_STATUS_AR (1ULL<<55) /* Action required */
/* AMD-specific bits */
-#define MCI_STATUS_DEFERRED (1ULL<<44) /* declare an uncorrected error */
+#define MCI_STATUS_DEFERRED (1ULL<<44) /* uncorrected error, deferred exception */
#define MCI_STATUS_POISON (1ULL<<43) /* access poisonous data */
+#define MCI_STATUS_TCC (1ULL<<55) /* Task context corrupt */
+
+/*
+ * McaX field if set indicates a given bank supports MCA extensions:
+ * - Deferred error interrupt type is specifiable by bank.
+ * - MCx_MISC0[BlkPtr] field indicates presence of extended MISC registers,
+ * But should not be used to determine MSR numbers.
+ * - TCC bit is present in MCx_STATUS.
+ */
+#define MCI_CONFIG_MCAX 0x1
+#define MCI_IPID_MCATYPE 0xFFFF0000
+#define MCI_IPID_HWID 0xFFF
/*
* Note that the full MCACOD field of IA32_MCi_STATUS MSR is
@@ -91,6 +103,16 @@
#define MCE_LOG_LEN 32
#define MCE_LOG_SIGNATURE "MACHINECHECK"
+/* AMD Scalable MCA */
+#define MSR_AMD64_SMCA_MC0_MISC0 0xc0002003
+#define MSR_AMD64_SMCA_MC0_CONFIG 0xc0002004
+#define MSR_AMD64_SMCA_MC0_IPID 0xc0002005
+#define MSR_AMD64_SMCA_MC0_MISC1 0xc000200a
+#define MSR_AMD64_SMCA_MCx_MISC(x) (MSR_AMD64_SMCA_MC0_MISC0 + 0x10*(x))
+#define MSR_AMD64_SMCA_MCx_CONFIG(x) (MSR_AMD64_SMCA_MC0_CONFIG + 0x10*(x))
+#define MSR_AMD64_SMCA_MCx_IPID(x) (MSR_AMD64_SMCA_MC0_IPID + 0x10*(x))
+#define MSR_AMD64_SMCA_MCx_MISCy(x, y) ((MSR_AMD64_SMCA_MC0_MISC1 + y) + (0x10*(x)))
+
/*
* This structure contains all data related to the MCE log. Also
* carries a signature to make it easier to find from external
@@ -113,6 +135,7 @@ struct mca_config {
bool ignore_ce;
bool disabled;
bool ser;
+ bool recovery;
bool bios_cmci_threshold;
u8 banks;
s8 bootlog;
@@ -287,4 +310,49 @@ struct cper_sec_mem_err;
extern void apei_mce_report_mem_error(int corrected,
struct cper_sec_mem_err *mem_err);
+/*
+ * Enumerate new IP types and HWID values in AMD processors which support
+ * Scalable MCA.
+ */
+#ifdef CONFIG_X86_MCE_AMD
+enum amd_ip_types {
+ SMCA_F17H_CORE = 0, /* Core errors */
+ SMCA_DF, /* Data Fabric */
+ SMCA_UMC, /* Unified Memory Controller */
+ SMCA_PB, /* Parameter Block */
+ SMCA_PSP, /* Platform Security Processor */
+ SMCA_SMU, /* System Management Unit */
+ N_AMD_IP_TYPES
+};
+
+struct amd_hwid {
+ const char *name;
+ unsigned int hwid;
+};
+
+extern struct amd_hwid amd_hwids[N_AMD_IP_TYPES];
+
+enum amd_core_mca_blocks {
+ SMCA_LS = 0, /* Load Store */
+ SMCA_IF, /* Instruction Fetch */
+ SMCA_L2_CACHE, /* L2 cache */
+ SMCA_DE, /* Decoder unit */
+ RES, /* Reserved */
+ SMCA_EX, /* Execution unit */
+ SMCA_FP, /* Floating Point */
+ SMCA_L3_CACHE, /* L3 cache */
+ N_CORE_MCA_BLOCKS
+};
+
+extern const char * const amd_core_mcablock_names[N_CORE_MCA_BLOCKS];
+
+enum amd_df_mca_blocks {
+ SMCA_CS = 0, /* Coherent Slave */
+ SMCA_PIE, /* Power management, Interrupts, etc */
+ N_DF_BLOCKS
+};
+
+extern const char * const amd_df_mcablock_names[N_DF_BLOCKS];
+#endif
+
#endif /* _ASM_X86_MCE_H */
diff --git a/arch/x86/include/asm/microcode.h b/arch/x86/include/asm/microcode.h
index 1e1b07a5a7388..9d3a96c4da789 100644
--- a/arch/x86/include/asm/microcode.h
+++ b/arch/x86/include/asm/microcode.h
@@ -3,6 +3,7 @@
#include
#include
+#include
#define native_rdmsr(msr, val1, val2) \
do { \
@@ -143,4 +144,29 @@ static inline void reload_early_microcode(void) { }
static inline bool
get_builtin_firmware(struct cpio_data *cd, const char *name) { return false; }
#endif
+
+static inline unsigned long get_initrd_start(void)
+{
+#ifdef CONFIG_BLK_DEV_INITRD
+ return initrd_start;
+#else
+ return 0;
+#endif
+}
+
+static inline unsigned long get_initrd_start_addr(void)
+{
+#ifdef CONFIG_BLK_DEV_INITRD
+#ifdef CONFIG_X86_32
+ unsigned long *initrd_start_p = (unsigned long *)__pa_nodebug(&initrd_start);
+
+ return (unsigned long)__pa_nodebug(*initrd_start_p);
+#else
+ return get_initrd_start();
+#endif
+#else /* CONFIG_BLK_DEV_INITRD */
+ return 0;
+#endif
+}
+
#endif /* _ASM_X86_MICROCODE_H */
diff --git a/arch/x86/include/asm/microcode_intel.h b/arch/x86/include/asm/microcode_intel.h
index 8559b0102ea1f..603417f8dd6cf 100644
--- a/arch/x86/include/asm/microcode_intel.h
+++ b/arch/x86/include/asm/microcode_intel.h
@@ -40,7 +40,6 @@ struct extended_sigtable {
#define DEFAULT_UCODE_TOTALSIZE (DEFAULT_UCODE_DATASIZE + MC_HEADER_SIZE)
#define EXT_HEADER_SIZE (sizeof(struct extended_sigtable))
#define EXT_SIGNATURE_SIZE (sizeof(struct extended_signature))
-#define DWSIZE (sizeof(u32))
#define get_totalsize(mc) \
(((struct microcode_intel *)mc)->hdr.datasize ? \
diff --git a/arch/x86/include/asm/mmu.h b/arch/x86/include/asm/mmu.h
index 55234d5e7160d..1ea0baef1175c 100644
--- a/arch/x86/include/asm/mmu.h
+++ b/arch/x86/include/asm/mmu.h
@@ -19,7 +19,8 @@ typedef struct {
#endif
struct mutex lock;
- void __user *vdso;
+ void __user *vdso; /* vdso base address */
+ const struct vdso_image *vdso_image; /* vdso image in use */
atomic_t perf_rdpmc_allowed; /* nonzero if rdpmc is allowed */
} mm_context_t;
diff --git a/arch/x86/include/asm/msr-index.h b/arch/x86/include/asm/msr-index.h
index b05402ef3b842..984ab75bf6218 100644
--- a/arch/x86/include/asm/msr-index.h
+++ b/arch/x86/include/asm/msr-index.h
@@ -1,7 +1,12 @@
#ifndef _ASM_X86_MSR_INDEX_H
#define _ASM_X86_MSR_INDEX_H
-/* CPU model specific register (MSR) numbers */
+/*
+ * CPU model specific register (MSR) numbers.
+ *
+ * Do not add new entries to this file unless the definitions are shared
+ * between multiple compilation units.
+ */
/* x86-64 specific MSRs */
#define MSR_EFER 0xc0000080 /* extended feature register */
diff --git a/arch/x86/include/asm/mwait.h b/arch/x86/include/asm/mwait.h
index c70689b5e5aa4..0deeb2d26df7c 100644
--- a/arch/x86/include/asm/mwait.h
+++ b/arch/x86/include/asm/mwait.h
@@ -3,6 +3,8 @@
#include
+#include
+
#define MWAIT_SUBSTATE_MASK 0xf
#define MWAIT_CSTATE_MASK 0xf
#define MWAIT_SUBSTATE_SIZE 4
diff --git a/arch/x86/include/asm/perf_event.h b/arch/x86/include/asm/perf_event.h
index 7bcb861a04e5c..5a2ed3ed2f261 100644
--- a/arch/x86/include/asm/perf_event.h
+++ b/arch/x86/include/asm/perf_event.h
@@ -165,6 +165,7 @@ struct x86_pmu_capability {
#define GLOBAL_STATUS_ASIF BIT_ULL(60)
#define GLOBAL_STATUS_COUNTERS_FROZEN BIT_ULL(59)
#define GLOBAL_STATUS_LBRS_FROZEN BIT_ULL(58)
+#define GLOBAL_STATUS_TRACE_TOPAPMI BIT_ULL(55)
/*
* IBS cpuid feature detection
diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h
index 20c11d1aa4ccc..983738ac014c6 100644
--- a/arch/x86/include/asm/processor.h
+++ b/arch/x86/include/asm/processor.h
@@ -13,7 +13,7 @@ struct vm86;
#include
#include
#include
-#include
+#include
#include
#include
#include
@@ -24,7 +24,6 @@ struct vm86;
#include
#include
-#include
#include
#include
#include
@@ -129,6 +128,8 @@ struct cpuinfo_x86 {
u16 booted_cores;
/* Physical processor id: */
u16 phys_proc_id;
+ /* Logical processor id: */
+ u16 logical_proc_id;
/* Core id: */
u16 cpu_core_id;
/* Compute unit id */
@@ -298,10 +299,13 @@ struct tss_struct {
*/
unsigned long io_bitmap[IO_BITMAP_LONGS + 1];
+#ifdef CONFIG_X86_32
/*
- * Space for the temporary SYSENTER stack:
+ * Space for the temporary SYSENTER stack.
*/
+ unsigned long SYSENTER_stack_canary;
unsigned long SYSENTER_stack[64];
+#endif
} ____cacheline_aligned;
diff --git a/arch/x86/include/asm/proto.h b/arch/x86/include/asm/proto.h
index a4a77286cb1dd..9b9b30b194418 100644
--- a/arch/x86/include/asm/proto.h
+++ b/arch/x86/include/asm/proto.h
@@ -7,12 +7,23 @@
void syscall_init(void);
+#ifdef CONFIG_X86_64
void entry_SYSCALL_64(void);
-void entry_SYSCALL_compat(void);
+#endif
+
+#ifdef CONFIG_X86_32
void entry_INT80_32(void);
-void entry_INT80_compat(void);
void entry_SYSENTER_32(void);
+void __begin_SYSENTER_singlestep_region(void);
+void __end_SYSENTER_singlestep_region(void);
+#endif
+
+#ifdef CONFIG_IA32_EMULATION
void entry_SYSENTER_compat(void);
+void __end_entry_SYSENTER_compat(void);
+void entry_SYSCALL_compat(void);
+void entry_INT80_compat(void);
+#endif
void x86_configure_nx(void);
void x86_report_nx(void);
diff --git a/arch/x86/include/asm/sections.h b/arch/x86/include/asm/sections.h
index 0a52424286590..13b6cdd0af570 100644
--- a/arch/x86/include/asm/sections.h
+++ b/arch/x86/include/asm/sections.h
@@ -7,7 +7,7 @@
extern char __brk_base[], __brk_limit[];
extern struct exception_table_entry __stop___ex_table[];
-#if defined(CONFIG_X86_64) && defined(CONFIG_DEBUG_RODATA)
+#if defined(CONFIG_X86_64)
extern char __end_rodata_hpage_align[];
#endif
diff --git a/arch/x86/include/asm/sighandling.h b/arch/x86/include/asm/sighandling.h
index 89db46752a8f0..452c88b8ad069 100644
--- a/arch/x86/include/asm/sighandling.h
+++ b/arch/x86/include/asm/sighandling.h
@@ -13,7 +13,6 @@
X86_EFLAGS_CF | X86_EFLAGS_RF)
void signal_fault(struct pt_regs *regs, void __user *frame, char *where);
-int restore_sigcontext(struct pt_regs *regs, struct sigcontext __user *sc);
int setup_sigcontext(struct sigcontext __user *sc, void __user *fpstate,
struct pt_regs *regs, unsigned long mask);
diff --git a/arch/x86/include/asm/smap.h b/arch/x86/include/asm/smap.h
index ba665ebd17bb8..db333300bd4be 100644
--- a/arch/x86/include/asm/smap.h
+++ b/arch/x86/include/asm/smap.h
@@ -15,7 +15,7 @@
#include
#include
-#include
+#include
/* "Raw" instruction opcodes */
#define __ASM_CLAC .byte 0x0f,0x01,0xca
diff --git a/arch/x86/include/asm/smp.h b/arch/x86/include/asm/smp.h
index dfcf0727623b3..20a3de5cb3b0d 100644
--- a/arch/x86/include/asm/smp.h
+++ b/arch/x86/include/asm/smp.h
@@ -16,7 +16,6 @@
#endif
#include
#include
-#include
extern int smp_num_siblings;
extern unsigned int num_processors;
diff --git a/arch/x86/include/asm/string_64.h b/arch/x86/include/asm/string_64.h
index ff8b9a17dc4b2..ca6ba36077054 100644
--- a/arch/x86/include/asm/string_64.h
+++ b/arch/x86/include/asm/string_64.h
@@ -78,6 +78,19 @@ int strcmp(const char *cs, const char *ct);
#define memset(s, c, n) __memset(s, c, n)
#endif
+/**
+ * memcpy_mcsafe - copy memory with indication if a machine check happened
+ *
+ * @dst: destination address
+ * @src: source address
+ * @cnt: number of bytes to copy
+ *
+ * Low level memory copy function that catches machine checks
+ *
+ * Return true for success, false for fail
+ */
+bool memcpy_mcsafe(void *dst, const void *src, size_t cnt);
+
#endif /* __KERNEL__ */
#endif /* _ASM_X86_STRING_64_H */
diff --git a/arch/x86/include/asm/thread_info.h b/arch/x86/include/asm/thread_info.h
index c7b551028740f..82866697fcf18 100644
--- a/arch/x86/include/asm/thread_info.h
+++ b/arch/x86/include/asm/thread_info.h
@@ -49,7 +49,7 @@
*/
#ifndef __ASSEMBLY__
struct task_struct;
-#include
+#include
#include
struct thread_info {
@@ -134,10 +134,13 @@ struct thread_info {
#define _TIF_ADDR32 (1 << TIF_ADDR32)
#define _TIF_X32 (1 << TIF_X32)
-/* work to do in syscall_trace_enter() */
+/*
+ * work to do in syscall_trace_enter(). Also includes TIF_NOHZ for
+ * enter_from_user_mode()
+ */
#define _TIF_WORK_SYSCALL_ENTRY \
(_TIF_SYSCALL_TRACE | _TIF_SYSCALL_EMU | _TIF_SYSCALL_AUDIT | \
- _TIF_SECCOMP | _TIF_SINGLESTEP | _TIF_SYSCALL_TRACEPOINT | \
+ _TIF_SECCOMP | _TIF_SYSCALL_TRACEPOINT | \
_TIF_NOHZ)
/* work to do on any return to user space */
diff --git a/arch/x86/include/asm/tlbflush.h b/arch/x86/include/asm/tlbflush.h
index 6df2029405a3a..c24b4224d4392 100644
--- a/arch/x86/include/asm/tlbflush.h
+++ b/arch/x86/include/asm/tlbflush.h
@@ -5,8 +5,57 @@
#include
#include
+#include
#include
+static inline void __invpcid(unsigned long pcid, unsigned long addr,
+ unsigned long type)
+{
+ struct { u64 d[2]; } desc = { { pcid, addr } };
+
+ /*
+ * The memory clobber is because the whole point is to invalidate
+ * stale TLB entries and, especially if we're flushing global
+ * mappings, we don't want the compiler to reorder any subsequent
+ * memory accesses before the TLB flush.
+ *
+ * The hex opcode is invpcid (%ecx), %eax in 32-bit mode and
+ * invpcid (%rcx), %rax in long mode.
+ */
+ asm volatile (".byte 0x66, 0x0f, 0x38, 0x82, 0x01"
+ : : "m" (desc), "a" (type), "c" (&desc) : "memory");
+}
+
+#define INVPCID_TYPE_INDIV_ADDR 0
+#define INVPCID_TYPE_SINGLE_CTXT 1
+#define INVPCID_TYPE_ALL_INCL_GLOBAL 2
+#define INVPCID_TYPE_ALL_NON_GLOBAL 3
+
+/* Flush all mappings for a given pcid and addr, not including globals. */
+static inline void invpcid_flush_one(unsigned long pcid,
+ unsigned long addr)
+{
+ __invpcid(pcid, addr, INVPCID_TYPE_INDIV_ADDR);
+}
+
+/* Flush all mappings for a given PCID, not including globals. */
+static inline void invpcid_flush_single_context(unsigned long pcid)
+{
+ __invpcid(pcid, 0, INVPCID_TYPE_SINGLE_CTXT);
+}
+
+/* Flush all mappings, including globals, for all PCIDs. */
+static inline void invpcid_flush_all(void)
+{
+ __invpcid(0, 0, INVPCID_TYPE_ALL_INCL_GLOBAL);
+}
+
+/* Flush all mappings for all PCIDs except globals. */
+static inline void invpcid_flush_all_nonglobals(void)
+{
+ __invpcid(0, 0, INVPCID_TYPE_ALL_NON_GLOBAL);
+}
+
#ifdef CONFIG_PARAVIRT
#include
#else
@@ -104,6 +153,15 @@ static inline void __native_flush_tlb_global(void)
{
unsigned long flags;
+ if (static_cpu_has(X86_FEATURE_INVPCID)) {
+ /*
+ * Using INVPCID is considerably faster than a pair of writes
+ * to CR4 sandwiched inside an IRQ flag save/restore.
+ */
+ invpcid_flush_all();
+ return;
+ }
+
/*
* Read-modify-write to CR4 - protect it from preemption and
* from interrupts. (Use the raw variant because this code can
diff --git a/arch/x86/include/asm/topology.h b/arch/x86/include/asm/topology.h
index 0fb46482dfde1..7f991bd5031b2 100644
--- a/arch/x86/include/asm/topology.h
+++ b/arch/x86/include/asm/topology.h
@@ -119,12 +119,23 @@ static inline void setup_node_to_cpumask_map(void) { }
extern const struct cpumask *cpu_coregroup_mask(int cpu);
+#define topology_logical_package_id(cpu) (cpu_data(cpu).logical_proc_id)
#define topology_physical_package_id(cpu) (cpu_data(cpu).phys_proc_id)
#define topology_core_id(cpu) (cpu_data(cpu).cpu_core_id)
#ifdef ENABLE_TOPO_DEFINES
#define topology_core_cpumask(cpu) (per_cpu(cpu_core_map, cpu))
#define topology_sibling_cpumask(cpu) (per_cpu(cpu_sibling_map, cpu))
+
+extern unsigned int __max_logical_packages;
+#define topology_max_packages() (__max_logical_packages)
+int topology_update_package_map(unsigned int apicid, unsigned int cpu);
+extern int topology_phys_to_logical_pkg(unsigned int pkg);
+#else
+#define topology_max_packages() (1)
+static inline int
+topology_update_package_map(unsigned int apicid, unsigned int cpu) { return 0; }
+static inline int topology_phys_to_logical_pkg(unsigned int pkg) { return 0; }
#endif
static inline void arch_fix_phys_package_id(int num, u32 slot)
diff --git a/arch/x86/include/asm/tsc.h b/arch/x86/include/asm/tsc.h
index 6d7c5479bceae..174c4212780af 100644
--- a/arch/x86/include/asm/tsc.h
+++ b/arch/x86/include/asm/tsc.h
@@ -29,6 +29,8 @@ static inline cycles_t get_cycles(void)
return rdtsc();
}
+extern struct system_counterval_t convert_art_to_tsc(cycle_t art);
+
extern void tsc_init(void);
extern void mark_tsc_unstable(char *reason);
extern int unsynchronized_tsc(void);
diff --git a/arch/x86/include/asm/uaccess.h b/arch/x86/include/asm/uaccess.h
index a4a30e4b2d343..c0f27d7ea7ff9 100644
--- a/arch/x86/include/asm/uaccess.h
+++ b/arch/x86/include/asm/uaccess.h
@@ -90,12 +90,11 @@ static inline bool __chk_range_not_ok(unsigned long addr, unsigned long size, un
likely(!__range_not_ok(addr, size, user_addr_max()))
/*
- * The exception table consists of pairs of addresses relative to the
- * exception table enty itself: the first is the address of an
- * instruction that is allowed to fault, and the second is the address
- * at which the program should continue. No registers are modified,
- * so it is entirely up to the continuation code to figure out what to
- * do.
+ * The exception table consists of triples of addresses relative to the
+ * exception table entry itself. The first address is of an instruction
+ * that is allowed to fault, the second is the target at which the program
+ * should continue. The third is a handler function to deal with the fault
+ * caused by the instruction in the first field.
*
* All the routines below use bits of fixup code that are out of line
* with the main instruction path. This means when everything is well,
@@ -104,13 +103,14 @@ static inline bool __chk_range_not_ok(unsigned long addr, unsigned long size, un
*/
struct exception_table_entry {
- int insn, fixup;
+ int insn, fixup, handler;
};
/* This is not the generic standard exception_table_entry format */
#define ARCH_HAS_SORT_EXTABLE
#define ARCH_HAS_SEARCH_EXTABLE
-extern int fixup_exception(struct pt_regs *regs);
+extern int fixup_exception(struct pt_regs *regs, int trapnr);
+extern bool ex_has_fault_handler(unsigned long ip);
extern int early_fixup_exception(unsigned long *ip);
/*
diff --git a/arch/x86/include/asm/uaccess_64.h b/arch/x86/include/asm/uaccess_64.h
index b89c34c4019b5..307698688fa1c 100644
--- a/arch/x86/include/asm/uaccess_64.h
+++ b/arch/x86/include/asm/uaccess_64.h
@@ -8,7 +8,7 @@
#include
#include
#include
-#include
+#include
#include
/*
diff --git a/arch/x86/include/asm/vdso.h b/arch/x86/include/asm/vdso.h
index deabaf9759b64..43dc55be524e7 100644
--- a/arch/x86/include/asm/vdso.h
+++ b/arch/x86/include/asm/vdso.h
@@ -13,9 +13,6 @@ struct vdso_image {
void *data;
unsigned long size; /* Always a multiple of PAGE_SIZE */
- /* text_mapping.pages is big enough for data/size page pointers */
- struct vm_special_mapping text_mapping;
-
unsigned long alt, alt_len;
long sym_vvar_start; /* Negative offset to the vvar area */
diff --git a/arch/x86/include/asm/vgtod.h b/arch/x86/include/asm/vgtod.h
index f556c4843aa18..e728699db7741 100644
--- a/arch/x86/include/asm/vgtod.h
+++ b/arch/x86/include/asm/vgtod.h
@@ -37,6 +37,12 @@ struct vsyscall_gtod_data {
};
extern struct vsyscall_gtod_data vsyscall_gtod_data;
+extern int vclocks_used;
+static inline bool vclock_was_used(int vclock)
+{
+ return READ_ONCE(vclocks_used) & (1 << vclock);
+}
+
static inline unsigned gtod_read_begin(const struct vsyscall_gtod_data *s)
{
unsigned ret;
diff --git a/arch/x86/include/uapi/asm/sigcontext.h b/arch/x86/include/uapi/asm/sigcontext.h
index d485232f1e9f9..62d4111c1c549 100644
--- a/arch/x86/include/uapi/asm/sigcontext.h
+++ b/arch/x86/include/uapi/asm/sigcontext.h
@@ -256,7 +256,7 @@ struct sigcontext_64 {
__u16 cs;
__u16 gs;
__u16 fs;
- __u16 __pad0;
+ __u16 ss;
__u64 err;
__u64 trapno;
__u64 oldmask;
@@ -341,9 +341,37 @@ struct sigcontext {
__u64 rip;
__u64 eflags; /* RFLAGS */
__u16 cs;
+
+ /*
+ * Prior to 2.5.64 ("[PATCH] x86-64 updates for 2.5.64-bk3"),
+ * Linux saved and restored fs and gs in these slots. This
+ * was counterproductive, as fsbase and gsbase were never
+ * saved, so arch_prctl was presumably unreliable.
+ *
+ * These slots should never be reused without extreme caution:
+ *
+ * - Some DOSEMU versions stash fs and gs in these slots manually,
+ * thus overwriting anything the kernel expects to be preserved
+ * in these slots.
+ *
+ * - If these slots are ever needed for any other purpose,
+ * there is some risk that very old 64-bit binaries could get
+ * confused. I doubt that many such binaries still work,
+ * though, since the same patch in 2.5.64 also removed the
+ * 64-bit set_thread_area syscall, so it appears that there
+ * is no TLS API beyond modify_ldt that works in both pre-
+ * and post-2.5.64 kernels.
+ *
+ * If the kernel ever adds explicit fs, gs, fsbase, and gsbase
+ * save/restore, it will most likely need to be opt-in and use
+ * different context slots.
+ */
__u16 gs;
__u16 fs;
- __u16 __pad0;
+ union {
+ __u16 ss; /* If UC_SIGCONTEXT_SS */
+ __u16 __pad0; /* Alias name for old (!UC_SIGCONTEXT_SS) user-space */
+ };
__u64 err;
__u64 trapno;
__u64 oldmask;
diff --git a/arch/x86/include/uapi/asm/ucontext.h b/arch/x86/include/uapi/asm/ucontext.h
index b7c29c8017f28..e3d1ec90616ed 100644
--- a/arch/x86/include/uapi/asm/ucontext.h
+++ b/arch/x86/include/uapi/asm/ucontext.h
@@ -1,11 +1,54 @@
#ifndef _ASM_X86_UCONTEXT_H
#define _ASM_X86_UCONTEXT_H
-#define UC_FP_XSTATE 0x1 /* indicates the presence of extended state
- * information in the memory layout pointed
- * by the fpstate pointer in the ucontext's
- * sigcontext struct (uc_mcontext).
- */
+/*
+ * Indicates the presence of extended state information in the memory
+ * layout pointed by the fpstate pointer in the ucontext's sigcontext
+ * struct (uc_mcontext).
+ */
+#define UC_FP_XSTATE 0x1
+
+#ifdef __x86_64__
+/*
+ * UC_SIGCONTEXT_SS will be set when delivering 64-bit or x32 signals on
+ * kernels that save SS in the sigcontext. All kernels that set
+ * UC_SIGCONTEXT_SS will correctly restore at least the low 32 bits of esp
+ * regardless of SS (i.e. they implement espfix).
+ *
+ * Kernels that set UC_SIGCONTEXT_SS will also set UC_STRICT_RESTORE_SS
+ * when delivering a signal that came from 64-bit code.
+ *
+ * Sigreturn restores SS as follows:
+ *
+ * if (saved SS is valid || UC_STRICT_RESTORE_SS is set ||
+ * saved CS is not 64-bit)
+ * new SS = saved SS (will fail IRET and signal if invalid)
+ * else
+ * new SS = a flat 32-bit data segment
+ *
+ * This behavior serves three purposes:
+ *
+ * - Legacy programs that construct a 64-bit sigcontext from scratch
+ * with zero or garbage in the SS slot (e.g. old CRIU) and call
+ * sigreturn will still work.
+ *
+ * - Old DOSEMU versions sometimes catch a signal from a segmented
+ * context, delete the old SS segment (with modify_ldt), and change
+ * the saved CS to a 64-bit segment. These DOSEMU versions expect
+ * sigreturn to send them back to 64-bit mode without killing them,
+ * despite the fact that the SS selector when the signal was raised is
+ * no longer valid. UC_STRICT_RESTORE_SS will be clear, so the kernel
+ * will fix up SS for these DOSEMU versions.
+ *
+ * - Old and new programs that catch a signal and return without
+ * modifying the saved context will end up in exactly the state they
+ * started in, even if they were running in a segmented context when
+ * the signal was raised.. Old kernels would lose track of the
+ * previous SS value.
+ */
+#define UC_SIGCONTEXT_SS 0x2
+#define UC_STRICT_RESTORE_SS 0x4
+#endif
#include
diff --git a/arch/x86/kernel/acpi/sleep.c b/arch/x86/kernel/acpi/sleep.c
index d1daead5fcddd..adb3eaf8fe2a5 100644
--- a/arch/x86/kernel/acpi/sleep.c
+++ b/arch/x86/kernel/acpi/sleep.c
@@ -16,6 +16,7 @@
#include
#include
+#include
#include "../../realmode/rm/wakeup.h"
#include "sleep.h"
@@ -107,7 +108,13 @@ int x86_acpi_suspend_lowlevel(void)
saved_magic = 0x123456789abcdef0L;
#endif /* CONFIG_64BIT */
+ /*
+ * Pause/unpause graph tracing around do_suspend_lowlevel as it has
+ * inconsistent call/return info after it jumps to the wakeup vector.
+ */
+ pause_graph_tracing();
do_suspend_lowlevel();
+ unpause_graph_tracing();
return 0;
}
diff --git a/arch/x86/kernel/apic/apic.c b/arch/x86/kernel/apic/apic.c
index 8a5cddac7d444..531b9611c51d5 100644
--- a/arch/x86/kernel/apic/apic.c
+++ b/arch/x86/kernel/apic/apic.c
@@ -2077,6 +2077,20 @@ int generic_processor_info(int apicid, int version)
} else
cpu = cpumask_next_zero(-1, cpu_present_mask);
+ /*
+ * This can happen on physical hotplug. The sanity check at boot time
+ * is done from native_smp_prepare_cpus() after num_possible_cpus() is
+ * established.
+ */
+ if (topology_update_package_map(apicid, cpu) < 0) {
+ int thiscpu = max + disabled_cpus;
+
+ pr_warning("ACPI: Package limit reached. Processor %d/0x%x ignored.\n",
+ thiscpu, apicid);
+ disabled_cpus++;
+ return -ENOSPC;
+ }
+
/*
* Validate version
*/
diff --git a/arch/x86/kernel/apic/apic_flat_64.c b/arch/x86/kernel/apic/apic_flat_64.c
index 9968f30cca3e1..76f89e2b245af 100644
--- a/arch/x86/kernel/apic/apic_flat_64.c
+++ b/arch/x86/kernel/apic/apic_flat_64.c
@@ -53,7 +53,7 @@ void flat_init_apic_ldr(void)
apic_write(APIC_LDR, val);
}
-static inline void _flat_send_IPI_mask(unsigned long mask, int vector)
+static void _flat_send_IPI_mask(unsigned long mask, int vector)
{
unsigned long flags;
diff --git a/arch/x86/kernel/apic/apic_numachip.c b/arch/x86/kernel/apic/apic_numachip.c
index c80c02c6ec494..ab5c2c685a3ce 100644
--- a/arch/x86/kernel/apic/apic_numachip.c
+++ b/arch/x86/kernel/apic/apic_numachip.c
@@ -30,7 +30,7 @@ static unsigned int numachip1_get_apic_id(unsigned long x)
unsigned long value;
unsigned int id = (x >> 24) & 0xff;
- if (static_cpu_has_safe(X86_FEATURE_NODEID_MSR)) {
+ if (static_cpu_has(X86_FEATURE_NODEID_MSR)) {
rdmsrl(MSR_FAM10H_NODE_ID, value);
id |= (value << 2) & 0xff00;
}
@@ -178,7 +178,7 @@ static void fixup_cpu_id(struct cpuinfo_x86 *c, int node)
this_cpu_write(cpu_llc_id, node);
/* Account for nodes per socket in multi-core-module processors */
- if (static_cpu_has_safe(X86_FEATURE_NODEID_MSR)) {
+ if (static_cpu_has(X86_FEATURE_NODEID_MSR)) {
rdmsrl(MSR_FAM10H_NODE_ID, val);
nodes = ((val >> 3) & 7) + 1;
}
diff --git a/arch/x86/kernel/apic/ipi.c b/arch/x86/kernel/apic/ipi.c
index eb45fc9b61248..28bde88b0085d 100644
--- a/arch/x86/kernel/apic/ipi.c
+++ b/arch/x86/kernel/apic/ipi.c
@@ -18,6 +18,66 @@
#include
#include
+void __default_send_IPI_shortcut(unsigned int shortcut, int vector, unsigned int dest)
+{
+ /*
+ * Subtle. In the case of the 'never do double writes' workaround
+ * we have to lock out interrupts to be safe. As we don't care
+ * of the value read we use an atomic rmw access to avoid costly
+ * cli/sti. Otherwise we use an even cheaper single atomic write
+ * to the APIC.
+ */
+ unsigned int cfg;
+
+ /*
+ * Wait for idle.
+ */
+ __xapic_wait_icr_idle();
+
+ /*
+ * No need to touch the target chip field
+ */
+ cfg = __prepare_ICR(shortcut, vector, dest);
+
+ /*
+ * Send the IPI. The write to APIC_ICR fires this off.
+ */
+ native_apic_mem_write(APIC_ICR, cfg);
+}
+
+/*
+ * This is used to send an IPI with no shorthand notation (the destination is
+ * specified in bits 56 to 63 of the ICR).
+ */
+void __default_send_IPI_dest_field(unsigned int mask, int vector, unsigned int dest)
+{
+ unsigned long cfg;
+
+ /*
+ * Wait for idle.
+ */
+ if (unlikely(vector == NMI_VECTOR))
+ safe_apic_wait_icr_idle();
+ else
+ __xapic_wait_icr_idle();
+
+ /*
+ * prepare target chip field
+ */
+ cfg = __prepare_ICR2(mask);
+ native_apic_mem_write(APIC_ICR2, cfg);
+
+ /*
+ * program the ICR
+ */
+ cfg = __prepare_ICR(0, vector, dest);
+
+ /*
+ * Send the IPI. The write to APIC_ICR fires this off.
+ */
+ native_apic_mem_write(APIC_ICR, cfg);
+}
+
void default_send_IPI_single_phys(int cpu, int vector)
{
unsigned long flags;
diff --git a/arch/x86/kernel/asm-offsets.c b/arch/x86/kernel/asm-offsets.c
index 84a7524b202ca..5c042466f274d 100644
--- a/arch/x86/kernel/asm-offsets.c
+++ b/arch/x86/kernel/asm-offsets.c
@@ -59,7 +59,6 @@ void common(void) {
#ifdef CONFIG_PARAVIRT
BLANK();
- OFFSET(PARAVIRT_enabled, pv_info, paravirt_enabled);
OFFSET(PARAVIRT_PATCH_pv_cpu_ops, paravirt_patch_template, pv_cpu_ops);
OFFSET(PARAVIRT_PATCH_pv_irq_ops, paravirt_patch_template, pv_irq_ops);
OFFSET(PV_IRQ_irq_disable, pv_irq_ops, irq_disable);
diff --git a/arch/x86/kernel/asm-offsets_32.c b/arch/x86/kernel/asm-offsets_32.c
index 6ce39025f467f..ecdc1d217dc0f 100644
--- a/arch/x86/kernel/asm-offsets_32.c
+++ b/arch/x86/kernel/asm-offsets_32.c
@@ -7,7 +7,7 @@
#include
#include "../../../drivers/lguest/lg.h"
-#define __SYSCALL_I386(nr, sym, compat) [nr] = 1,
+#define __SYSCALL_I386(nr, sym, qual) [nr] = 1,
static char syscalls[] = {
#include
};
@@ -52,6 +52,11 @@ void foo(void)
DEFINE(TSS_sysenter_sp0, offsetof(struct tss_struct, x86_tss.sp0) -
offsetofend(struct tss_struct, SYSENTER_stack));
+ /* Offset from cpu_tss to SYSENTER_stack */
+ OFFSET(CPU_TSS_SYSENTER_stack, tss_struct, SYSENTER_stack);
+ /* Size of SYSENTER_stack */
+ DEFINE(SIZEOF_SYSENTER_stack, sizeof(((struct tss_struct *)0)->SYSENTER_stack));
+
#if defined(CONFIG_LGUEST) || defined(CONFIG_LGUEST_GUEST) || defined(CONFIG_LGUEST_MODULE)
BLANK();
OFFSET(LGUEST_DATA_irq_enabled, lguest_data, irq_enabled);
diff --git a/arch/x86/kernel/asm-offsets_64.c b/arch/x86/kernel/asm-offsets_64.c
index f2edafb5f24eb..d875f97d4e0ba 100644
--- a/arch/x86/kernel/asm-offsets_64.c
+++ b/arch/x86/kernel/asm-offsets_64.c
@@ -4,17 +4,11 @@
#include
-#define __SYSCALL_64(nr, sym, compat) [nr] = 1,
-#define __SYSCALL_COMMON(nr, sym, compat) [nr] = 1,
-#ifdef CONFIG_X86_X32_ABI
-# define __SYSCALL_X32(nr, sym, compat) [nr] = 1,
-#else
-# define __SYSCALL_X32(nr, sym, compat) /* nothing */
-#endif
+#define __SYSCALL_64(nr, sym, qual) [nr] = 1,
static char syscalls_64[] = {
#include
};
-#define __SYSCALL_I386(nr, sym, compat) [nr] = 1,
+#define __SYSCALL_I386(nr, sym, qual) [nr] = 1,
static char syscalls_ia32[] = {
#include
};
diff --git a/arch/x86/kernel/cpu/Makefile b/arch/x86/kernel/cpu/Makefile
index 58031303e3048..0d373d7affc8d 100644
--- a/arch/x86/kernel/cpu/Makefile
+++ b/arch/x86/kernel/cpu/Makefile
@@ -30,33 +30,11 @@ obj-$(CONFIG_CPU_SUP_CENTAUR) += centaur.o
obj-$(CONFIG_CPU_SUP_TRANSMETA_32) += transmeta.o
obj-$(CONFIG_CPU_SUP_UMC_32) += umc.o
-obj-$(CONFIG_PERF_EVENTS) += perf_event.o
-
-ifdef CONFIG_PERF_EVENTS
-obj-$(CONFIG_CPU_SUP_AMD) += perf_event_amd.o perf_event_amd_uncore.o
-ifdef CONFIG_AMD_IOMMU
-obj-$(CONFIG_CPU_SUP_AMD) += perf_event_amd_iommu.o
-endif
-obj-$(CONFIG_CPU_SUP_INTEL) += perf_event_p6.o perf_event_knc.o perf_event_p4.o
-obj-$(CONFIG_CPU_SUP_INTEL) += perf_event_intel_lbr.o perf_event_intel_ds.o perf_event_intel.o
-obj-$(CONFIG_CPU_SUP_INTEL) += perf_event_intel_rapl.o perf_event_intel_cqm.o
-obj-$(CONFIG_CPU_SUP_INTEL) += perf_event_intel_pt.o perf_event_intel_bts.o
-obj-$(CONFIG_CPU_SUP_INTEL) += perf_event_intel_cstate.o
-
-obj-$(CONFIG_PERF_EVENTS_INTEL_UNCORE) += perf_event_intel_uncore.o \
- perf_event_intel_uncore_snb.o \
- perf_event_intel_uncore_snbep.o \
- perf_event_intel_uncore_nhmex.o
-obj-$(CONFIG_CPU_SUP_INTEL) += perf_event_msr.o
-obj-$(CONFIG_CPU_SUP_AMD) += perf_event_msr.o
-endif
-
-
obj-$(CONFIG_X86_MCE) += mcheck/
obj-$(CONFIG_MTRR) += mtrr/
obj-$(CONFIG_MICROCODE) += microcode/
-obj-$(CONFIG_X86_LOCAL_APIC) += perfctr-watchdog.o perf_event_amd_ibs.o
+obj-$(CONFIG_X86_LOCAL_APIC) += perfctr-watchdog.o
obj-$(CONFIG_HYPERVISOR_GUEST) += vmware.o hypervisor.o mshyperv.o
@@ -64,7 +42,7 @@ ifdef CONFIG_X86_FEATURE_NAMES
quiet_cmd_mkcapflags = MKCAP $@
cmd_mkcapflags = $(CONFIG_SHELL) $(srctree)/$(src)/mkcapflags.sh $< $@
-cpufeature = $(src)/../../include/asm/cpufeature.h
+cpufeature = $(src)/../../include/asm/cpufeatures.h
targets += capflags.c
$(obj)/capflags.c: $(cpufeature) $(src)/mkcapflags.sh FORCE
diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c
index a07956a08936e..97c59fd60702f 100644
--- a/arch/x86/kernel/cpu/amd.c
+++ b/arch/x86/kernel/cpu/amd.c
@@ -117,7 +117,7 @@ static void init_amd_k6(struct cpuinfo_x86 *c)
void (*f_vide)(void);
u64 d, d2;
- printk(KERN_INFO "AMD K6 stepping B detected - ");
+ pr_info("AMD K6 stepping B detected - ");
/*
* It looks like AMD fixed the 2.6.2 bug and improved indirect
@@ -133,10 +133,9 @@ static void init_amd_k6(struct cpuinfo_x86 *c)
d = d2-d;
if (d > 20*K6_BUG_LOOP)
- printk(KERN_CONT
- "system stability may be impaired when more than 32 MB are used.\n");
+ pr_cont("system stability may be impaired when more than 32 MB are used.\n");
else
- printk(KERN_CONT "probably OK (after B9730xxxx).\n");
+ pr_cont("probably OK (after B9730xxxx).\n");
}
/* K6 with old style WHCR */
@@ -154,7 +153,7 @@ static void init_amd_k6(struct cpuinfo_x86 *c)
wbinvd();
wrmsr(MSR_K6_WHCR, l, h);
local_irq_restore(flags);
- printk(KERN_INFO "Enabling old style K6 write allocation for %d Mb\n",
+ pr_info("Enabling old style K6 write allocation for %d Mb\n",
mbytes);
}
return;
@@ -175,7 +174,7 @@ static void init_amd_k6(struct cpuinfo_x86 *c)
wbinvd();
wrmsr(MSR_K6_WHCR, l, h);
local_irq_restore(flags);
- printk(KERN_INFO "Enabling new style K6 write allocation for %d Mb\n",
+ pr_info("Enabling new style K6 write allocation for %d Mb\n",
mbytes);
}
@@ -202,7 +201,7 @@ static void init_amd_k7(struct cpuinfo_x86 *c)
*/
if (c->x86_model >= 6 && c->x86_model <= 10) {
if (!cpu_has(c, X86_FEATURE_XMM)) {
- printk(KERN_INFO "Enabling disabled K7/SSE Support.\n");
+ pr_info("Enabling disabled K7/SSE Support.\n");
msr_clear_bit(MSR_K7_HWCR, 15);
set_cpu_cap(c, X86_FEATURE_XMM);
}
@@ -216,9 +215,8 @@ static void init_amd_k7(struct cpuinfo_x86 *c)
if ((c->x86_model == 8 && c->x86_mask >= 1) || (c->x86_model > 8)) {
rdmsr(MSR_K7_CLK_CTL, l, h);
if ((l & 0xfff00000) != 0x20000000) {
- printk(KERN_INFO
- "CPU: CLK_CTL MSR was %x. Reprogramming to %x\n",
- l, ((l & 0x000fffff)|0x20000000));
+ pr_info("CPU: CLK_CTL MSR was %x. Reprogramming to %x\n",
+ l, ((l & 0x000fffff)|0x20000000));
wrmsr(MSR_K7_CLK_CTL, (l & 0x000fffff)|0x20000000, h);
}
}
@@ -485,7 +483,7 @@ static void bsp_init_amd(struct cpuinfo_x86 *c)
if (!rdmsrl_safe(MSR_K8_TSEG_ADDR, &tseg)) {
unsigned long pfn = tseg >> PAGE_SHIFT;
- printk(KERN_DEBUG "tseg: %010llx\n", tseg);
+ pr_debug("tseg: %010llx\n", tseg);
if (pfn_range_is_mapped(pfn, pfn + 1))
set_memory_4k((unsigned long)__va(tseg), 1);
}
@@ -500,8 +498,7 @@ static void bsp_init_amd(struct cpuinfo_x86 *c)
rdmsrl(MSR_K7_HWCR, val);
if (!(val & BIT(24)))
- printk(KERN_WARNING FW_BUG "TSC doesn't count "
- "with P0 frequency!\n");
+ pr_warn(FW_BUG "TSC doesn't count with P0 frequency!\n");
}
}
diff --git a/arch/x86/kernel/cpu/bugs_64.c b/arch/x86/kernel/cpu/bugs_64.c
index 04f0fe5af83ec..a972ac4c7e7df 100644
--- a/arch/x86/kernel/cpu/bugs_64.c
+++ b/arch/x86/kernel/cpu/bugs_64.c
@@ -15,7 +15,7 @@ void __init check_bugs(void)
{
identify_boot_cpu();
#if !defined(CONFIG_SMP)
- printk(KERN_INFO "CPU: ");
+ pr_info("CPU: ");
print_cpu_info(&boot_cpu_data);
#endif
alternative_instructions();
diff --git a/arch/x86/kernel/cpu/centaur.c b/arch/x86/kernel/cpu/centaur.c
index ae20be6e483c7..1661d8ec92805 100644
--- a/arch/x86/kernel/cpu/centaur.c
+++ b/arch/x86/kernel/cpu/centaur.c
@@ -1,7 +1,7 @@
#include
#include
-#include
+#include
#include
#include
#include
@@ -29,7 +29,7 @@ static void init_c3(struct cpuinfo_x86 *c)
rdmsr(MSR_VIA_FCR, lo, hi);
lo |= ACE_FCR; /* enable ACE unit */
wrmsr(MSR_VIA_FCR, lo, hi);
- printk(KERN_INFO "CPU: Enabled ACE h/w crypto\n");
+ pr_info("CPU: Enabled ACE h/w crypto\n");
}
/* enable RNG unit, if present and disabled */
@@ -37,7 +37,7 @@ static void init_c3(struct cpuinfo_x86 *c)
rdmsr(MSR_VIA_RNG, lo, hi);
lo |= RNG_ENABLE; /* enable RNG unit */
wrmsr(MSR_VIA_RNG, lo, hi);
- printk(KERN_INFO "CPU: Enabled h/w RNG\n");
+ pr_info("CPU: Enabled h/w RNG\n");
}
/* store Centaur Extended Feature Flags as
@@ -130,7 +130,7 @@ static void init_centaur(struct cpuinfo_x86 *c)
name = "C6";
fcr_set = ECX8|DSMC|EDCTLB|EMMX|ERETSTK;
fcr_clr = DPDC;
- printk(KERN_NOTICE "Disabling bugged TSC.\n");
+ pr_notice("Disabling bugged TSC.\n");
clear_cpu_cap(c, X86_FEATURE_TSC);
break;
case 8:
@@ -163,11 +163,11 @@ static void init_centaur(struct cpuinfo_x86 *c)
newlo = (lo|fcr_set) & (~fcr_clr);
if (newlo != lo) {
- printk(KERN_INFO "Centaur FCR was 0x%X now 0x%X\n",
+ pr_info("Centaur FCR was 0x%X now 0x%X\n",
lo, newlo);
wrmsr(MSR_IDT_FCR1, newlo, hi);
} else {
- printk(KERN_INFO "Centaur FCR is 0x%X\n", lo);
+ pr_info("Centaur FCR is 0x%X\n", lo);
}
/* Emulate MTRRs using Centaur's MCR. */
set_cpu_cap(c, X86_FEATURE_CENTAUR_MCR);
diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c
index 37830de8f60a8..249461f958516 100644
--- a/arch/x86/kernel/cpu/common.c
+++ b/arch/x86/kernel/cpu/common.c
@@ -162,6 +162,22 @@ static int __init x86_mpx_setup(char *s)
}
__setup("nompx", x86_mpx_setup);
+static int __init x86_noinvpcid_setup(char *s)
+{
+ /* noinvpcid doesn't accept parameters */
+ if (s)
+ return -EINVAL;
+
+ /* do not emit a message if the feature is not present */
+ if (!boot_cpu_has(X86_FEATURE_INVPCID))
+ return 0;
+
+ setup_clear_cpu_cap(X86_FEATURE_INVPCID);
+ pr_info("noinvpcid: INVPCID feature disabled\n");
+ return 0;
+}
+early_param("noinvpcid", x86_noinvpcid_setup);
+
#ifdef CONFIG_X86_32
static int cachesize_override = -1;
static int disable_x86_serial_nr = 1;
@@ -228,7 +244,7 @@ static void squash_the_stupid_serial_number(struct cpuinfo_x86 *c)
lo |= 0x200000;
wrmsr(MSR_IA32_BBL_CR_CTL, lo, hi);
- printk(KERN_NOTICE "CPU serial number disabled.\n");
+ pr_notice("CPU serial number disabled.\n");
clear_cpu_cap(c, X86_FEATURE_PN);
/* Disabling the serial number may affect the cpuid level */
@@ -329,9 +345,8 @@ static void filter_cpuid_features(struct cpuinfo_x86 *c, bool warn)
if (!warn)
continue;
- printk(KERN_WARNING
- "CPU: CPU feature " X86_CAP_FMT " disabled, no CPUID level 0x%x\n",
- x86_cap_flag(df->feature), df->level);
+ pr_warn("CPU: CPU feature " X86_CAP_FMT " disabled, no CPUID level 0x%x\n",
+ x86_cap_flag(df->feature), df->level);
}
}
@@ -510,7 +525,7 @@ void detect_ht(struct cpuinfo_x86 *c)
smp_num_siblings = (ebx & 0xff0000) >> 16;
if (smp_num_siblings == 1) {
- printk_once(KERN_INFO "CPU0: Hyper-Threading is disabled\n");
+ pr_info_once("CPU0: Hyper-Threading is disabled\n");
goto out;
}
@@ -531,10 +546,10 @@ void detect_ht(struct cpuinfo_x86 *c)
out:
if (!printed && (c->x86_max_cores * smp_num_siblings) > 1) {
- printk(KERN_INFO "CPU: Physical Processor ID: %d\n",
- c->phys_proc_id);
- printk(KERN_INFO "CPU: Processor Core ID: %d\n",
- c->cpu_core_id);
+ pr_info("CPU: Physical Processor ID: %d\n",
+ c->phys_proc_id);
+ pr_info("CPU: Processor Core ID: %d\n",
+ c->cpu_core_id);
printed = 1;
}
#endif
@@ -559,9 +574,8 @@ static void get_cpu_vendor(struct cpuinfo_x86 *c)
}
}
- printk_once(KERN_ERR
- "CPU: vendor_id '%s' unknown, using generic init.\n" \
- "CPU: Your system may be unstable.\n", v);
+ pr_err_once("CPU: vendor_id '%s' unknown, using generic init.\n" \
+ "CPU: Your system may be unstable.\n", v);
c->x86_vendor = X86_VENDOR_UNKNOWN;
this_cpu = &default_cpu;
@@ -760,7 +774,7 @@ void __init early_cpu_init(void)
int count = 0;
#ifdef CONFIG_PROCESSOR_SELECT
- printk(KERN_INFO "KERNEL supported cpus:\n");
+ pr_info("KERNEL supported cpus:\n");
#endif
for (cdev = __x86_cpu_dev_start; cdev < __x86_cpu_dev_end; cdev++) {
@@ -778,7 +792,7 @@ void __init early_cpu_init(void)
for (j = 0; j < 2; j++) {
if (!cpudev->c_ident[j])
continue;
- printk(KERN_INFO " %s %s\n", cpudev->c_vendor,
+ pr_info(" %s %s\n", cpudev->c_vendor,
cpudev->c_ident[j]);
}
}
@@ -802,6 +816,31 @@ static void detect_nopl(struct cpuinfo_x86 *c)
clear_cpu_cap(c, X86_FEATURE_NOPL);
#else
set_cpu_cap(c, X86_FEATURE_NOPL);
+#endif
+
+ /*
+ * ESPFIX is a strange bug. All real CPUs have it. Paravirt
+ * systems that run Linux at CPL > 0 may or may not have the
+ * issue, but, even if they have the issue, there's absolutely
+ * nothing we can do about it because we can't use the real IRET
+ * instruction.
+ *
+ * NB: For the time being, only 32-bit kernels support
+ * X86_BUG_ESPFIX as such. 64-bit kernels directly choose
+ * whether to apply espfix using paravirt hooks. If any
+ * non-paravirt system ever shows up that does *not* have the
+ * ESPFIX issue, we can change this.
+ */
+#ifdef CONFIG_X86_32
+#ifdef CONFIG_PARAVIRT
+ do {
+ extern void native_iret(void);
+ if (pv_cpu_ops.iret == native_iret)
+ set_cpu_bug(c, X86_BUG_ESPFIX);
+ } while (0);
+#else
+ set_cpu_bug(c, X86_BUG_ESPFIX);
+#endif
#endif
}
@@ -977,6 +1016,8 @@ static void identify_cpu(struct cpuinfo_x86 *c)
#ifdef CONFIG_NUMA
numa_add_cpu(smp_processor_id());
#endif
+ /* The boot/hotplug time assigment got cleared, restore it */
+ c->logical_proc_id = topology_phys_to_logical_pkg(c->phys_proc_id);
}
/*
@@ -1061,7 +1102,7 @@ static void __print_cpu_msr(void)
for (index = index_min; index < index_max; index++) {
if (rdmsrl_safe(index, &val))
continue;
- printk(KERN_INFO " MSR%08x: %016llx\n", index, val);
+ pr_info(" MSR%08x: %016llx\n", index, val);
}
}
}
@@ -1100,19 +1141,19 @@ void print_cpu_info(struct cpuinfo_x86 *c)
}
if (vendor && !strstr(c->x86_model_id, vendor))
- printk(KERN_CONT "%s ", vendor);
+ pr_cont("%s ", vendor);
if (c->x86_model_id[0])
- printk(KERN_CONT "%s", c->x86_model_id);
+ pr_cont("%s", c->x86_model_id);
else
- printk(KERN_CONT "%d86", c->x86);
+ pr_cont("%d86", c->x86);
- printk(KERN_CONT " (family: 0x%x, model: 0x%x", c->x86, c->x86_model);
+ pr_cont(" (family: 0x%x, model: 0x%x", c->x86, c->x86_model);
if (c->x86_mask || c->cpuid_level >= 0)
- printk(KERN_CONT ", stepping: 0x%x)\n", c->x86_mask);
+ pr_cont(", stepping: 0x%x)\n", c->x86_mask);
else
- printk(KERN_CONT ")\n");
+ pr_cont(")\n");
print_cpu_msr(c);
}
@@ -1438,7 +1479,7 @@ void cpu_init(void)
show_ucode_info_early();
- printk(KERN_INFO "Initializing CPU#%d\n", cpu);
+ pr_info("Initializing CPU#%d\n", cpu);
if (cpu_feature_enabled(X86_FEATURE_VME) ||
cpu_has_tsc ||
@@ -1475,20 +1516,6 @@ void cpu_init(void)
}
#endif
-#ifdef CONFIG_X86_DEBUG_STATIC_CPU_HAS
-void warn_pre_alternatives(void)
-{
- WARN(1, "You're using static_cpu_has before alternatives have run!\n");
-}
-EXPORT_SYMBOL_GPL(warn_pre_alternatives);
-#endif
-
-inline bool __static_cpu_has_safe(u16 bit)
-{
- return boot_cpu_has(bit);
-}
-EXPORT_SYMBOL_GPL(__static_cpu_has_safe);
-
static void bsp_resume(void)
{
if (this_cpu->c_bsp_resume)
diff --git a/arch/x86/kernel/cpu/cyrix.c b/arch/x86/kernel/cpu/cyrix.c
index aaf152e796373..6adef9cac23ee 100644
--- a/arch/x86/kernel/cpu/cyrix.c
+++ b/arch/x86/kernel/cpu/cyrix.c
@@ -8,6 +8,7 @@
#include
#include
#include
+#include
#include "cpu.h"
@@ -103,7 +104,7 @@ static void check_cx686_slop(struct cpuinfo_x86 *c)
local_irq_restore(flags);
if (ccr5 & 2) { /* possible wrong calibration done */
- printk(KERN_INFO "Recalibrating delay loop with SLOP bit reset\n");
+ pr_info("Recalibrating delay loop with SLOP bit reset\n");
calibrate_delay();
c->loops_per_jiffy = loops_per_jiffy;
}
@@ -115,7 +116,7 @@ static void set_cx86_reorder(void)
{
u8 ccr3;
- printk(KERN_INFO "Enable Memory access reorder on Cyrix/NSC processor.\n");
+ pr_info("Enable Memory access reorder on Cyrix/NSC processor.\n");
ccr3 = getCx86(CX86_CCR3);
setCx86(CX86_CCR3, (ccr3 & 0x0f) | 0x10); /* enable MAPEN */
@@ -128,7 +129,7 @@ static void set_cx86_reorder(void)
static void set_cx86_memwb(void)
{
- printk(KERN_INFO "Enable Memory-Write-back mode on Cyrix/NSC processor.\n");
+ pr_info("Enable Memory-Write-back mode on Cyrix/NSC processor.\n");
/* CCR2 bit 2: unlock NW bit */
setCx86_old(CX86_CCR2, getCx86_old(CX86_CCR2) & ~0x04);
@@ -268,7 +269,7 @@ static void init_cyrix(struct cpuinfo_x86 *c)
* VSA1 we work around however.
*/
- printk(KERN_INFO "Working around Cyrix MediaGX virtual DMA bugs.\n");
+ pr_info("Working around Cyrix MediaGX virtual DMA bugs.\n");
isa_dma_bridge_buggy = 2;
/* We do this before the PCI layer is running. However we
@@ -426,7 +427,7 @@ static void cyrix_identify(struct cpuinfo_x86 *c)
if (dir0 == 5 || dir0 == 3) {
unsigned char ccr3;
unsigned long flags;
- printk(KERN_INFO "Enabling CPUID on Cyrix processor.\n");
+ pr_info("Enabling CPUID on Cyrix processor.\n");
local_irq_save(flags);
ccr3 = getCx86(CX86_CCR3);
/* enable MAPEN */
diff --git a/arch/x86/kernel/cpu/hypervisor.c b/arch/x86/kernel/cpu/hypervisor.c
index d820d8eae96be..73d391ae452f8 100644
--- a/arch/x86/kernel/cpu/hypervisor.c
+++ b/arch/x86/kernel/cpu/hypervisor.c
@@ -56,7 +56,7 @@ detect_hypervisor_vendor(void)
}
if (max_pri)
- printk(KERN_INFO "Hypervisor detected: %s\n", x86_hyper->name);
+ pr_info("Hypervisor detected: %s\n", x86_hyper->name);
}
void init_hypervisor(struct cpuinfo_x86 *c)
diff --git a/arch/x86/kernel/cpu/intel.c b/arch/x86/kernel/cpu/intel.c
index 565648bc1a0ae..1f7fdb91a818b 100644
--- a/arch/x86/kernel/cpu/intel.c
+++ b/arch/x86/kernel/cpu/intel.c
@@ -8,7 +8,7 @@
#include
#include
-#include
+#include
#include
#include
#include
@@ -61,7 +61,7 @@ static void early_init_intel(struct cpuinfo_x86 *c)
*/
if (c->x86 == 6 && c->x86_model == 0x1c && c->x86_mask <= 2 &&
c->microcode < 0x20e) {
- printk(KERN_WARNING "Atom PSE erratum detected, BIOS microcode update recommended\n");
+ pr_warn("Atom PSE erratum detected, BIOS microcode update recommended\n");
clear_cpu_cap(c, X86_FEATURE_PSE);
}
@@ -140,7 +140,7 @@ static void early_init_intel(struct cpuinfo_x86 *c)
if (c->x86 > 6 || (c->x86 == 6 && c->x86_model >= 0xd)) {
rdmsrl(MSR_IA32_MISC_ENABLE, misc_enable);
if (!(misc_enable & MSR_IA32_MISC_ENABLE_FAST_STRING)) {
- printk(KERN_INFO "Disabled fast string operations\n");
+ pr_info("Disabled fast string operations\n");
setup_clear_cpu_cap(X86_FEATURE_REP_GOOD);
setup_clear_cpu_cap(X86_FEATURE_ERMS);
}
@@ -160,6 +160,19 @@ static void early_init_intel(struct cpuinfo_x86 *c)
pr_info("Disabling PGE capability bit\n");
setup_clear_cpu_cap(X86_FEATURE_PGE);
}
+
+ if (c->cpuid_level >= 0x00000001) {
+ u32 eax, ebx, ecx, edx;
+
+ cpuid(0x00000001, &eax, &ebx, &ecx, &edx);
+ /*
+ * If HTT (EDX[28]) is set EBX[16:23] contain the number of
+ * apicids which are reserved per package. Store the resulting
+ * shift value for the package management code.
+ */
+ if (edx & (1U << 28))
+ c->x86_coreid_bits = get_count_order((ebx >> 16) & 0xff);
+ }
}
#ifdef CONFIG_X86_32
@@ -176,7 +189,7 @@ int ppro_with_ram_bug(void)
boot_cpu_data.x86 == 6 &&
boot_cpu_data.x86_model == 1 &&
boot_cpu_data.x86_mask < 8) {
- printk(KERN_INFO "Pentium Pro with Errata#50 detected. Taking evasive action.\n");
+ pr_info("Pentium Pro with Errata#50 detected. Taking evasive action.\n");
return 1;
}
return 0;
@@ -225,7 +238,7 @@ static void intel_workarounds(struct cpuinfo_x86 *c)
set_cpu_bug(c, X86_BUG_F00F);
if (!f00f_workaround_enabled) {
- printk(KERN_NOTICE "Intel Pentium with F0 0F bug - workaround enabled.\n");
+ pr_notice("Intel Pentium with F0 0F bug - workaround enabled.\n");
f00f_workaround_enabled = 1;
}
}
@@ -244,7 +257,7 @@ static void intel_workarounds(struct cpuinfo_x86 *c)
* Forcefully enable PAE if kernel parameter "forcepae" is present.
*/
if (forcepae) {
- printk(KERN_WARNING "PAE forced!\n");
+ pr_warn("PAE forced!\n");
set_cpu_cap(c, X86_FEATURE_PAE);
add_taint(TAINT_CPU_OUT_OF_SPEC, LOCKDEP_NOW_UNRELIABLE);
}
diff --git a/arch/x86/kernel/cpu/intel_cacheinfo.c b/arch/x86/kernel/cpu/intel_cacheinfo.c
index 0b6c52388cf48..de6626c18e427 100644
--- a/arch/x86/kernel/cpu/intel_cacheinfo.c
+++ b/arch/x86/kernel/cpu/intel_cacheinfo.c
@@ -14,7 +14,7 @@
#include
#include
-#include
+#include
#include
#include
@@ -444,7 +444,7 @@ static ssize_t store_cache_disable(struct cacheinfo *this_leaf,
err = amd_set_l3_disable_slot(nb, cpu, slot, val);
if (err) {
if (err == -EEXIST)
- pr_warning("L3 slot %d in use/index already disabled!\n",
+ pr_warn("L3 slot %d in use/index already disabled!\n",
slot);
return err;
}
diff --git a/arch/x86/kernel/cpu/match.c b/arch/x86/kernel/cpu/match.c
index afa9f0d487ea0..fbb5e90557a52 100644
--- a/arch/x86/kernel/cpu/match.c
+++ b/arch/x86/kernel/cpu/match.c
@@ -1,5 +1,5 @@
#include
-#include
+#include
#include
#include
#include
diff --git a/arch/x86/kernel/cpu/mcheck/mce-inject.c b/arch/x86/kernel/cpu/mcheck/mce-inject.c
index 4cfba4371a71f..517619ea6498b 100644
--- a/arch/x86/kernel/cpu/mcheck/mce-inject.c
+++ b/arch/x86/kernel/cpu/mcheck/mce-inject.c
@@ -115,7 +115,7 @@ static int raise_local(void)
int cpu = m->extcpu;
if (m->inject_flags & MCJ_EXCEPTION) {
- printk(KERN_INFO "Triggering MCE exception on CPU %d\n", cpu);
+ pr_info("Triggering MCE exception on CPU %d\n", cpu);
switch (context) {
case MCJ_CTX_IRQ:
/*
@@ -128,15 +128,15 @@ static int raise_local(void)
raise_exception(m, NULL);
break;
default:
- printk(KERN_INFO "Invalid MCE context\n");
+ pr_info("Invalid MCE context\n");
ret = -EINVAL;
}
- printk(KERN_INFO "MCE exception done on CPU %d\n", cpu);
+ pr_info("MCE exception done on CPU %d\n", cpu);
} else if (m->status) {
- printk(KERN_INFO "Starting machine check poll CPU %d\n", cpu);
+ pr_info("Starting machine check poll CPU %d\n", cpu);
raise_poll(m);
mce_notify_irq();
- printk(KERN_INFO "Machine check poll done on CPU %d\n", cpu);
+ pr_info("Machine check poll done on CPU %d\n", cpu);
} else
m->finished = 0;
@@ -183,8 +183,7 @@ static void raise_mce(struct mce *m)
start = jiffies;
while (!cpumask_empty(mce_inject_cpumask)) {
if (!time_before(jiffies, start + 2*HZ)) {
- printk(KERN_ERR
- "Timeout waiting for mce inject %lx\n",
+ pr_err("Timeout waiting for mce inject %lx\n",
*cpumask_bits(mce_inject_cpumask));
break;
}
@@ -241,7 +240,7 @@ static int inject_init(void)
{
if (!alloc_cpumask_var(&mce_inject_cpumask, GFP_KERNEL))
return -ENOMEM;
- printk(KERN_INFO "Machine check injector initialized\n");
+ pr_info("Machine check injector initialized\n");
register_mce_write_callback(mce_write);
register_nmi_handler(NMI_LOCAL, mce_raise_notify, 0,
"mce_notify");
diff --git a/arch/x86/kernel/cpu/mcheck/mce-severity.c b/arch/x86/kernel/cpu/mcheck/mce-severity.c
index 9c682c222071d..5119766d98892 100644
--- a/arch/x86/kernel/cpu/mcheck/mce-severity.c
+++ b/arch/x86/kernel/cpu/mcheck/mce-severity.c
@@ -14,6 +14,7 @@
#include