From f42259ef810ce83f3e1a8ea4ce12dfda873fbe44 Mon Sep 17 00:00:00 2001 From: Hang Yuan <hang.yuan@linux.intel.com> Date: Wed, 19 Sep 2018 14:42:09 +0800 Subject: [PATCH 001/443] drm/i915/gvt: invalidate old ggtt page when update ggtt entry Previously only cancelled dma map of a ggtt page when the ggtt entry was cleared. This patch will cancel dma map of an old ggtt page as well when the ggtt entry is updated with new page address. Fixes: 7598e8700e9a(drm/i915/gvt: Missed to cancel dma map for ggtt entries) Signed-off-by: Hang Yuan <hang.yuan@linux.intel.com> Signed-off-by: Zhenyu Wang <zhenyuw@linux.intel.com> --- drivers/gpu/drm/i915/gvt/gtt.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/i915/gvt/gtt.c b/drivers/gpu/drm/i915/gvt/gtt.c index 00aad8164dec2..c11e353ca9043 100644 --- a/drivers/gpu/drm/i915/gvt/gtt.c +++ b/drivers/gpu/drm/i915/gvt/gtt.c @@ -2259,16 +2259,18 @@ static int emulate_ggtt_mmio_write(struct intel_vgpu *vgpu, unsigned int off, } else ops->set_pfn(&m, dma_addr >> PAGE_SHIFT); } else { - ggtt_get_host_entry(ggtt_mm, &m, g_gtt_index); - ggtt_invalidate_pte(vgpu, &m); ops->set_pfn(&m, gvt->gtt.scratch_mfn); ops->clear_present(&m); } out: + ggtt_set_guest_entry(ggtt_mm, &e, g_gtt_index); + + ggtt_get_host_entry(ggtt_mm, &e, g_gtt_index); + ggtt_invalidate_pte(vgpu, &e); + ggtt_set_host_entry(ggtt_mm, &m, g_gtt_index); ggtt_invalidate(gvt->dev_priv); - ggtt_set_guest_entry(ggtt_mm, &e, g_gtt_index); return 0; } From 202dc3cc10b4d37e5251431acf8d5040a8876c7d Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven <geert+renesas@glider.be> Date: Tue, 9 Oct 2018 19:41:58 +0200 Subject: [PATCH 002/443] serial: sh-sci: Fix receive on SCIFA/SCIFB variants with DMA On SCIFA and SCIFB serial ports with DMA support (i.e. some ports on R-Car Gen2 and RZ/G1 SoCs), receive DMA operations are submitted before the DMA channel pointer is initialized. Hence this fails, and the driver tries to fall back to PIO. However, at this early phase in the initialization sequence, fallback to PIO does not work, leading to a serial port that cannot receive any data. Fix this by calling sci_submit_rx() after initialization of the DMA channel pointer. Reported-by: Yoshihiro Shimoda <yoshihiro.shimoda.uh@renesas.com> Fixes: 2c4ee23530ffc022 ("serial: sh-sci: Postpone DMA release when falling back to PIO") Signed-off-by: Geert Uytterhoeven <geert+renesas@glider.be> Reviewed-by: Wolfram Sang <wsa+renesas@sang-engineering.com> Tested-by: Wolfram Sang <wsa+renesas@sang-engineering.com> Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> --- drivers/tty/serial/sh-sci.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/tty/serial/sh-sci.c b/drivers/tty/serial/sh-sci.c index ab3f6e91853da..e19bfbba8a019 100644 --- a/drivers/tty/serial/sh-sci.c +++ b/drivers/tty/serial/sh-sci.c @@ -1614,10 +1614,10 @@ static void sci_request_dma(struct uart_port *port) hrtimer_init(&s->rx_timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL); s->rx_timer.function = rx_timer_fn; + s->chan_rx_saved = s->chan_rx = chan; + if (port->type == PORT_SCIFA || port->type == PORT_SCIFB) sci_submit_rx(s); - - s->chan_rx_saved = s->chan_rx = chan; } } From 61792b677415b77c8db04991c22966bb8de7603e Mon Sep 17 00:00:00 2001 From: Florian Westphal <fw@strlen.de> Date: Tue, 23 Oct 2018 16:47:16 +0200 Subject: [PATCH 003/443] netfilter: ipv6: fix oops when defragmenting locally generated fragments MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Unlike ipv4 and normal ipv6 defrag, netfilter ipv6 defragmentation did not save/restore skb->dst. This causes oops when handling locally generated ipv6 fragments, as output path needs a valid dst. Reported-by: Maciej Żenczykowski <zenczykowski@gmail.com> Fixes: 84379c9afe01 ("netfilter: ipv6: nf_defrag: drop skb dst before queueing") Signed-off-by: Florian Westphal <fw@strlen.de> Reviewed-by: Eric Dumazet <edumazet@google.com> Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org> --- net/ipv6/netfilter/nf_conntrack_reasm.c | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/net/ipv6/netfilter/nf_conntrack_reasm.c b/net/ipv6/netfilter/nf_conntrack_reasm.c index b8ac369f98ad8..d219979c3e529 100644 --- a/net/ipv6/netfilter/nf_conntrack_reasm.c +++ b/net/ipv6/netfilter/nf_conntrack_reasm.c @@ -587,11 +587,16 @@ int nf_ct_frag6_gather(struct net *net, struct sk_buff *skb, u32 user) */ ret = -EINPROGRESS; if (fq->q.flags == (INET_FRAG_FIRST_IN | INET_FRAG_LAST_IN) && - fq->q.meat == fq->q.len && - nf_ct_frag6_reasm(fq, skb, dev)) - ret = 0; - else + fq->q.meat == fq->q.len) { + unsigned long orefdst = skb->_skb_refdst; + + skb->_skb_refdst = 0UL; + if (nf_ct_frag6_reasm(fq, skb, dev)) + ret = 0; + skb->_skb_refdst = orefdst; + } else { skb_dst_drop(skb); + } out_unlock: spin_unlock_bh(&fq->q.lock); From 5e91c9d9cd3fd557226ca75fed58816b9eee7e07 Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso <pablo@netfilter.org> Date: Mon, 22 Oct 2018 21:49:45 +0200 Subject: [PATCH 004/443] netfilter: nft_osf: check if attribute is present If the attribute is not sent, eg. old libnftnl binary, then tb[NFTA_OSF_TTL] is NULL and kernel crashes from the _init path. Fixes: a218dc82f0b5 ("netfilter: nft_osf: Add ttl option support") Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org> --- net/netfilter/nft_osf.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/netfilter/nft_osf.c b/net/netfilter/nft_osf.c index ca5e5d8c5ef8b..b13618c764ec2 100644 --- a/net/netfilter/nft_osf.c +++ b/net/netfilter/nft_osf.c @@ -50,7 +50,7 @@ static int nft_osf_init(const struct nft_ctx *ctx, int err; u8 ttl; - if (nla_get_u8(tb[NFTA_OSF_TTL])) { + if (tb[NFTA_OSF_TTL]) { ttl = nla_get_u8(tb[NFTA_OSF_TTL]); if (ttl > 2) return -EINVAL; From 5a8de47b3c250521dd632cdedaac6db88367defa Mon Sep 17 00:00:00 2001 From: Jiri Slaby <jslaby@suse.cz> Date: Wed, 24 Oct 2018 13:54:03 +0200 Subject: [PATCH 005/443] netfilter: bridge: define INT_MIN & INT_MAX in userspace MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit With 4.19, programs like ebtables fail to build when they include "linux/netfilter_bridge.h". It is caused by commit 94276fa8a2a4 which added a use of INT_MIN and INT_MAX to the header: : In file included from /usr/include/linux/netfilter_bridge/ebtables.h:18, : from include/ebtables_u.h:28, : from communication.c:23: : /usr/include/linux/netfilter_bridge.h:30:20: error: 'INT_MIN' undeclared here (not in a function) : NF_BR_PRI_FIRST = INT_MIN, : ^~~~~~~ Define these constants by including "limits.h" when !__KERNEL__ (the same way as for other netfilter_* headers). Fixes: 94276fa8a2a4 ("netfilter: bridge: Expose nf_tables bridge hook priorities through uapi") Signed-off-by: Jiri Slaby <jslaby@suse.cz> Acked-by: Máté Eckl <ecklm94@gmail.com> Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org> --- include/uapi/linux/netfilter_bridge.h | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/include/uapi/linux/netfilter_bridge.h b/include/uapi/linux/netfilter_bridge.h index 156ccd089df18..1610fdbab98df 100644 --- a/include/uapi/linux/netfilter_bridge.h +++ b/include/uapi/linux/netfilter_bridge.h @@ -11,6 +11,10 @@ #include <linux/if_vlan.h> #include <linux/if_pppox.h> +#ifndef __KERNEL__ +#include <limits.h> /* for INT_MIN, INT_MAX */ +#endif + /* Bridge Hooks */ /* After promisc drops, checksum checks. */ #define NF_BR_PRE_ROUTING 0 From 1b50bb4d36e89fd54c14722c4ab5266ef17767ff Mon Sep 17 00:00:00 2001 From: Pavel Machek <pavel@ucw.cz> Date: Wed, 24 Oct 2018 22:44:39 +0200 Subject: [PATCH 006/443] Fix pattern handling optimalization Check for zero duration before skipping step. This fixes pattern echo "0 1000 10 2550 0 1000" > pattern which should do [ .-xXx-.] but does [ Xx-.] Signed-off-by: Pavel Machek <pavel@ucw.cz> Suggested-by: Jacek Anaszewski <jacek.anaszewski@gmail.com> Signed-off-by: Jacek Anaszewski <jacek.anaszewski@gmail.com> --- drivers/leds/trigger/ledtrig-pattern.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/drivers/leds/trigger/ledtrig-pattern.c b/drivers/leds/trigger/ledtrig-pattern.c index ce7acd115dd8d..174a298f1be02 100644 --- a/drivers/leds/trigger/ledtrig-pattern.c +++ b/drivers/leds/trigger/ledtrig-pattern.c @@ -87,9 +87,10 @@ static void pattern_trig_timer_function(struct timer_list *t) data->curr->brightness); mod_timer(&data->timer, jiffies + msecs_to_jiffies(data->curr->delta_t)); - - /* Skip the tuple with zero duration */ - pattern_trig_update_patterns(data); + if (!data->next->delta_t) { + /* Skip the tuple with zero duration */ + pattern_trig_update_patterns(data); + } /* Select next tuple */ pattern_trig_update_patterns(data); } else { From ef5febae1543f35a45f01614123e829d77326d0f Mon Sep 17 00:00:00 2001 From: Vasily Gorbik <gor@linux.ibm.com> Date: Fri, 19 Oct 2018 13:37:46 +0200 Subject: [PATCH 007/443] s390/decompressor: add missing FORCE to build targets According to Documentation/kbuild/makefiles.txt all build targets using if_changed should use FORCE as well. Add missing FORCE to make sure vmlinux decompressor targets are rebuild properly when not just immediate prerequisites have changed but also when build command differs. Reviewed-by: Philipp Rudo <prudo@linux.ibm.com> Signed-off-by: Vasily Gorbik <gor@linux.ibm.com> Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com> --- arch/s390/boot/compressed/Makefile | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/arch/s390/boot/compressed/Makefile b/arch/s390/boot/compressed/Makefile index 593039620487a..92b5487b0556d 100644 --- a/arch/s390/boot/compressed/Makefile +++ b/arch/s390/boot/compressed/Makefile @@ -22,7 +22,7 @@ OBJCOPYFLAGS := OBJECTS := $(addprefix $(obj)/,$(obj-y)) LDFLAGS_vmlinux := --oformat $(LD_BFD) -e startup -T -$(obj)/vmlinux: $(obj)/vmlinux.lds $(objtree)/arch/s390/boot/startup.a $(OBJECTS) +$(obj)/vmlinux: $(obj)/vmlinux.lds $(objtree)/arch/s390/boot/startup.a $(OBJECTS) FORCE $(call if_changed,ld) OBJCOPYFLAGS_info.bin := -O binary --only-section=.vmlinux.info @@ -46,17 +46,17 @@ suffix-$(CONFIG_KERNEL_LZMA) := .lzma suffix-$(CONFIG_KERNEL_LZO) := .lzo suffix-$(CONFIG_KERNEL_XZ) := .xz -$(obj)/vmlinux.bin.gz: $(vmlinux.bin.all-y) +$(obj)/vmlinux.bin.gz: $(vmlinux.bin.all-y) FORCE $(call if_changed,gzip) -$(obj)/vmlinux.bin.bz2: $(vmlinux.bin.all-y) +$(obj)/vmlinux.bin.bz2: $(vmlinux.bin.all-y) FORCE $(call if_changed,bzip2) -$(obj)/vmlinux.bin.lz4: $(vmlinux.bin.all-y) +$(obj)/vmlinux.bin.lz4: $(vmlinux.bin.all-y) FORCE $(call if_changed,lz4) -$(obj)/vmlinux.bin.lzma: $(vmlinux.bin.all-y) +$(obj)/vmlinux.bin.lzma: $(vmlinux.bin.all-y) FORCE $(call if_changed,lzma) -$(obj)/vmlinux.bin.lzo: $(vmlinux.bin.all-y) +$(obj)/vmlinux.bin.lzo: $(vmlinux.bin.all-y) FORCE $(call if_changed,lzo) -$(obj)/vmlinux.bin.xz: $(vmlinux.bin.all-y) +$(obj)/vmlinux.bin.xz: $(vmlinux.bin.all-y) FORCE $(call if_changed,xzkern) OBJCOPYFLAGS_piggy.o := -I binary -O elf64-s390 -B s390:64-bit --rename-section .data=.vmlinux.bin.compressed From b44b136a3773d8a9c7853f8df716bd1483613cbb Mon Sep 17 00:00:00 2001 From: Vasily Gorbik <gor@linux.ibm.com> Date: Fri, 19 Oct 2018 15:37:01 +0200 Subject: [PATCH 008/443] s390/vdso: add missing FORCE to build targets According to Documentation/kbuild/makefiles.txt all build targets using if_changed should use FORCE as well. Add missing FORCE to make sure vdso targets are rebuild properly when not just immediate prerequisites have changed but also when build command differs. Reviewed-by: Philipp Rudo <prudo@linux.ibm.com> Signed-off-by: Vasily Gorbik <gor@linux.ibm.com> Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com> --- arch/s390/kernel/vdso32/Makefile | 6 +++--- arch/s390/kernel/vdso64/Makefile | 6 +++--- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/arch/s390/kernel/vdso32/Makefile b/arch/s390/kernel/vdso32/Makefile index eb8aebea3ea7b..e76309fbbcb3b 100644 --- a/arch/s390/kernel/vdso32/Makefile +++ b/arch/s390/kernel/vdso32/Makefile @@ -37,7 +37,7 @@ KASAN_SANITIZE := n $(obj)/vdso32_wrapper.o : $(obj)/vdso32.so # link rule for the .so file, .lds has to be first -$(obj)/vdso32.so.dbg: $(src)/vdso32.lds $(obj-vdso32) +$(obj)/vdso32.so.dbg: $(src)/vdso32.lds $(obj-vdso32) FORCE $(call if_changed,vdso32ld) # strip rule for the .so file @@ -46,12 +46,12 @@ $(obj)/%.so: $(obj)/%.so.dbg FORCE $(call if_changed,objcopy) # assembly rules for the .S files -$(obj-vdso32): %.o: %.S +$(obj-vdso32): %.o: %.S FORCE $(call if_changed_dep,vdso32as) # actual build commands quiet_cmd_vdso32ld = VDSO32L $@ - cmd_vdso32ld = $(CC) $(c_flags) -Wl,-T $^ -o $@ + cmd_vdso32ld = $(CC) $(c_flags) -Wl,-T $(filter %.lds %.o,$^) -o $@ quiet_cmd_vdso32as = VDSO32A $@ cmd_vdso32as = $(CC) $(a_flags) -c -o $@ $< diff --git a/arch/s390/kernel/vdso64/Makefile b/arch/s390/kernel/vdso64/Makefile index a22b2cf86eec9..f849ac61c5da0 100644 --- a/arch/s390/kernel/vdso64/Makefile +++ b/arch/s390/kernel/vdso64/Makefile @@ -37,7 +37,7 @@ KASAN_SANITIZE := n $(obj)/vdso64_wrapper.o : $(obj)/vdso64.so # link rule for the .so file, .lds has to be first -$(obj)/vdso64.so.dbg: $(src)/vdso64.lds $(obj-vdso64) +$(obj)/vdso64.so.dbg: $(src)/vdso64.lds $(obj-vdso64) FORCE $(call if_changed,vdso64ld) # strip rule for the .so file @@ -46,12 +46,12 @@ $(obj)/%.so: $(obj)/%.so.dbg FORCE $(call if_changed,objcopy) # assembly rules for the .S files -$(obj-vdso64): %.o: %.S +$(obj-vdso64): %.o: %.S FORCE $(call if_changed_dep,vdso64as) # actual build commands quiet_cmd_vdso64ld = VDSO64L $@ - cmd_vdso64ld = $(CC) $(c_flags) -Wl,-T $^ -o $@ + cmd_vdso64ld = $(CC) $(c_flags) -Wl,-T $(filter %.lds %.o,$^) -o $@ quiet_cmd_vdso64as = VDSO64A $@ cmd_vdso64as = $(CC) $(a_flags) -c -o $@ $< From 5a2e1853d68904c4b26706dba2884cbeb77bc3ee Mon Sep 17 00:00:00 2001 From: Vasily Gorbik <gor@linux.ibm.com> Date: Wed, 17 Oct 2018 13:59:46 +0200 Subject: [PATCH 009/443] s390: avoid vmlinux segments overlap Currently .vmlinux.info section of uncompressed vmlinux elf image is included into the data segment and load address specified as 0. That extends data segment to address 0 and makes "text" and "data" segments overlap. Program Headers: Type Offset VirtAddr PhysAddr FileSiz MemSiz Flags Align LOAD 0x0000000000001000 0x0000000000100000 0x0000000000100000 0x0000000000ead03c 0x0000000000ead03c R E 0x1000 LOAD 0x0000000000eaf000 0x0000000000000000 0x0000000000000000 0x0000000001a13400 0x000000000233b520 RWE 0x1000 NOTE 0x0000000000eae000 0x0000000000fad000 0x0000000000fad000 0x000000000000003c 0x000000000000003c 0x4 Section to Segment mapping: Segment Sections... 00 .text .notes 01 .rodata __ksymtab __ksymtab_gpl __ksymtab_strings __param __modver .data..ro_after_init __ex_table .data __bug_table .init.text .exit.text .exit.data .altinstructions .altinstr_replacement .nospec_call_table .nospec_return_table .boot.data .init.data .data..percpu .bss .vmlinux.info 02 .notes Later when vmlinux.bin is produced from vmlinux, .vmlinux.info section is removed. But elf vmlinux file, even though it is not bootable anymore, used for debugging and loadable segments overlap should be avoided. Utilize special ":NONE" phdr specification to avoid adding .vmlinux.info into loadable data segment. Also set .vmlinux.info section type to INFO, which allows to get a not-loadable info CONTENTS section. Since minimal supported version of binutils 2.20 does not have --dump-section objcopy option, make .vmlinux.info section loadable during info.bin creation to get actual section contents. Reported-by: Philipp Rudo <prudo@linux.ibm.com> Signed-off-by: Vasily Gorbik <gor@linux.ibm.com> Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com> --- arch/s390/boot/compressed/Makefile | 2 +- arch/s390/kernel/vmlinux.lds.S | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/arch/s390/boot/compressed/Makefile b/arch/s390/boot/compressed/Makefile index 92b5487b0556d..b1bdd15e3429f 100644 --- a/arch/s390/boot/compressed/Makefile +++ b/arch/s390/boot/compressed/Makefile @@ -25,7 +25,7 @@ LDFLAGS_vmlinux := --oformat $(LD_BFD) -e startup -T $(obj)/vmlinux: $(obj)/vmlinux.lds $(objtree)/arch/s390/boot/startup.a $(OBJECTS) FORCE $(call if_changed,ld) -OBJCOPYFLAGS_info.bin := -O binary --only-section=.vmlinux.info +OBJCOPYFLAGS_info.bin := -O binary --only-section=.vmlinux.info --set-section-flags .vmlinux.info=load $(obj)/info.bin: vmlinux FORCE $(call if_changed,objcopy) diff --git a/arch/s390/kernel/vmlinux.lds.S b/arch/s390/kernel/vmlinux.lds.S index 21eb7407d51ba..8429ab0797157 100644 --- a/arch/s390/kernel/vmlinux.lds.S +++ b/arch/s390/kernel/vmlinux.lds.S @@ -154,14 +154,14 @@ SECTIONS * uncompressed image info used by the decompressor * it should match struct vmlinux_info */ - .vmlinux.info 0 : { + .vmlinux.info 0 (INFO) : { QUAD(_stext) /* default_lma */ QUAD(startup_continue) /* entry */ QUAD(__bss_start - _stext) /* image_size */ QUAD(__bss_stop - __bss_start) /* bss_size */ QUAD(__boot_data_start) /* bootdata_off */ QUAD(__boot_data_end - __boot_data_start) /* bootdata_size */ - } + } :NONE /* Debugging sections. */ STABS_DEBUG From f11274396a538b31bc010f782e05c2ce3f804c13 Mon Sep 17 00:00:00 2001 From: Breno Leitao <leitao@debian.org> Date: Fri, 19 Oct 2018 17:01:33 -0300 Subject: [PATCH 010/443] HID: hiddev: fix potential Spectre v1 uref->usage_index can be indirectly controlled by userspace, hence leading to a potential exploitation of the Spectre variant 1 vulnerability. This field is used as an array index by the hiddev_ioctl_usage() function, when 'cmd' is either HIDIOCGCOLLECTIONINDEX, HIDIOCGUSAGES or HIDIOCSUSAGES. For cmd == HIDIOCGCOLLECTIONINDEX case, uref->usage_index is compared to field->maxusage and then used as an index to dereference field->usage array. The same thing happens to the cmd == HIDIOC{G,S}USAGES cases, where uref->usage_index is checked against an array maximum value and then it is used as an index in an array. This is a summary of the HIDIOCGCOLLECTIONINDEX case, which matches the traditional Spectre V1 first load: copy_from_user(uref, user_arg, sizeof(*uref)) if (uref->usage_index >= field->maxusage) goto inval; i = field->usage[uref->usage_index].collection_index; return i; This patch fixes this by sanitizing field uref->usage_index before using it to index field->usage (HIDIOCGCOLLECTIONINDEX) or field->value in HIDIOC{G,S}USAGES arrays, thus, avoiding speculation in the first load. Cc: <stable@vger.kernel.org> Signed-off-by: Breno Leitao <leitao@debian.org> -- v2: Contemplate cmd == HIDIOC{G,S}USAGES case Signed-off-by: Jiri Kosina <jkosina@suse.cz> --- drivers/hid/usbhid/hiddev.c | 18 ++++++++++++++---- 1 file changed, 14 insertions(+), 4 deletions(-) diff --git a/drivers/hid/usbhid/hiddev.c b/drivers/hid/usbhid/hiddev.c index 23872d08308cd..a746017fac170 100644 --- a/drivers/hid/usbhid/hiddev.c +++ b/drivers/hid/usbhid/hiddev.c @@ -512,14 +512,24 @@ static noinline int hiddev_ioctl_usage(struct hiddev *hiddev, unsigned int cmd, if (cmd == HIDIOCGCOLLECTIONINDEX) { if (uref->usage_index >= field->maxusage) goto inval; + uref->usage_index = + array_index_nospec(uref->usage_index, + field->maxusage); } else if (uref->usage_index >= field->report_count) goto inval; } - if ((cmd == HIDIOCGUSAGES || cmd == HIDIOCSUSAGES) && - (uref_multi->num_values > HID_MAX_MULTI_USAGES || - uref->usage_index + uref_multi->num_values > field->report_count)) - goto inval; + if (cmd == HIDIOCGUSAGES || cmd == HIDIOCSUSAGES) { + if (uref_multi->num_values > HID_MAX_MULTI_USAGES || + uref->usage_index + uref_multi->num_values > + field->report_count) + goto inval; + + uref->usage_index = + array_index_nospec(uref->usage_index, + field->report_count - + uref_multi->num_values); + } switch (cmd) { case HIDIOCGUSAGE: From 00b790ea545b6ef30221adef6e9c3707e03b82b5 Mon Sep 17 00:00:00 2001 From: Kai-Heng Feng <kai.heng.feng@canonical.com> Date: Fri, 5 Oct 2018 12:46:29 +0800 Subject: [PATCH 011/443] HID: i2c-hid: Add a small delay after sleep command for Raydium touchpanel Raydium touchpanel (2386:4B33) sometimes does not work in desktop session although it works in display manager. During user logging, the display manager exits, close the HID device, then the device gets runtime suspended and powered off. The desktop session begins shortly after, opens the HID device, then the device gets runtime resumed and powered on. If the trasition from display manager to desktop sesesion is fast, the touchpanel cannot switch from powered off to powered on in short timeframe. So add a small delay to workaround the issue. Signed-off-by: Kai-Heng Feng <kai.heng.feng@canonical.com> Reviewed-by: Benjamin Tissoires <benjamin.tissoires@redhat.com> Signed-off-by: Jiri Kosina <jkosina@suse.cz> --- drivers/hid/hid-ids.h | 3 +++ drivers/hid/i2c-hid/i2c-hid-core.c | 19 +++++++++++++++++++ 2 files changed, 22 insertions(+) diff --git a/drivers/hid/hid-ids.h b/drivers/hid/hid-ids.h index f63489c882bb6..c0d668944dbe8 100644 --- a/drivers/hid/hid-ids.h +++ b/drivers/hid/hid-ids.h @@ -927,6 +927,9 @@ #define USB_DEVICE_ID_QUANTA_OPTICAL_TOUCH_3003 0x3003 #define USB_DEVICE_ID_QUANTA_OPTICAL_TOUCH_3008 0x3008 +#define I2C_VENDOR_ID_RAYDIUM 0x2386 +#define I2C_PRODUCT_ID_RAYDIUM_4B33 0x4b33 + #define USB_VENDOR_ID_RAZER 0x1532 #define USB_DEVICE_ID_RAZER_BLADE_14 0x011D diff --git a/drivers/hid/i2c-hid/i2c-hid-core.c b/drivers/hid/i2c-hid/i2c-hid-core.c index 4aab96cf08186..3cde7c1b9c33c 100644 --- a/drivers/hid/i2c-hid/i2c-hid-core.c +++ b/drivers/hid/i2c-hid/i2c-hid-core.c @@ -49,6 +49,7 @@ #define I2C_HID_QUIRK_SET_PWR_WAKEUP_DEV BIT(0) #define I2C_HID_QUIRK_NO_IRQ_AFTER_RESET BIT(1) #define I2C_HID_QUIRK_NO_RUNTIME_PM BIT(2) +#define I2C_HID_QUIRK_DELAY_AFTER_SLEEP BIT(3) /* flags */ #define I2C_HID_STARTED 0 @@ -158,6 +159,8 @@ struct i2c_hid { bool irq_wake_enabled; struct mutex reset_lock; + + unsigned long sleep_delay; }; static const struct i2c_hid_quirks { @@ -172,6 +175,8 @@ static const struct i2c_hid_quirks { { I2C_VENDOR_ID_HANTICK, I2C_PRODUCT_ID_HANTICK_5288, I2C_HID_QUIRK_NO_IRQ_AFTER_RESET | I2C_HID_QUIRK_NO_RUNTIME_PM }, + { I2C_VENDOR_ID_RAYDIUM, I2C_PRODUCT_ID_RAYDIUM_4B33, + I2C_HID_QUIRK_DELAY_AFTER_SLEEP }, { 0, 0 } }; @@ -387,6 +392,7 @@ static int i2c_hid_set_power(struct i2c_client *client, int power_state) { struct i2c_hid *ihid = i2c_get_clientdata(client); int ret; + unsigned long now, delay; i2c_hid_dbg(ihid, "%s\n", __func__); @@ -404,9 +410,22 @@ static int i2c_hid_set_power(struct i2c_client *client, int power_state) goto set_pwr_exit; } + if (ihid->quirks & I2C_HID_QUIRK_DELAY_AFTER_SLEEP && + power_state == I2C_HID_PWR_ON) { + now = jiffies; + if (time_after(ihid->sleep_delay, now)) { + delay = jiffies_to_usecs(ihid->sleep_delay - now); + usleep_range(delay, delay + 1); + } + } + ret = __i2c_hid_command(client, &hid_set_power_cmd, power_state, 0, NULL, 0, NULL, 0); + if (ihid->quirks & I2C_HID_QUIRK_DELAY_AFTER_SLEEP && + power_state == I2C_HID_PWR_SLEEP) + ihid->sleep_delay = jiffies + msecs_to_jiffies(20); + if (ret) dev_err(&client->dev, "failed to change power setting.\n"); From 6298944d8f57f40ee2a3e6dcea1253e78d7a9969 Mon Sep 17 00:00:00 2001 From: Benjamin Tissoires <benjamin.tissoires@redhat.com> Date: Wed, 17 Oct 2018 09:01:53 +0200 Subject: [PATCH 012/443] Revert "HID: add NOGET quirk for Eaton Ellipse MAX UPS" This reverts commit 67ddbb3e6568fb1820b2cc45b00c50702b114801. 67ddbb3e656 ("HID: add NOGET quirk for Eaton Ellipse MAX UPS") was reported by Laurent Bigonville. It turns out that a later model Laurent got doesn't need the quirk after all. My take is that Eaton upgraded their firmwares, so we don't need it anymore. The old model was from 2012, so better make sure the new line works properly by removing the quirk. This allows upower to actually fetch the current data. Reported-by: Laurent Bigonville <bigon@bigon.be> Signed-off-by: Benjamin Tissoires <benjamin.tissoires@redhat.com> Signed-off-by: Jiri Kosina <jkosina@suse.cz> --- drivers/hid/hid-quirks.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/hid/hid-quirks.c b/drivers/hid/hid-quirks.c index 52c3b01917e72..8237dd86fb17f 100644 --- a/drivers/hid/hid-quirks.c +++ b/drivers/hid/hid-quirks.c @@ -107,7 +107,6 @@ static const struct hid_device_id hid_quirks[] = { { HID_USB_DEVICE(USB_VENDOR_ID_LOGITECH, USB_DEVICE_ID_LOGITECH_MOUSE_C05A), HID_QUIRK_ALWAYS_POLL }, { HID_USB_DEVICE(USB_VENDOR_ID_LOGITECH, USB_DEVICE_ID_LOGITECH_MOUSE_C06A), HID_QUIRK_ALWAYS_POLL }, { HID_USB_DEVICE(USB_VENDOR_ID_MCS, USB_DEVICE_ID_MCS_GAMEPADBLOCK), HID_QUIRK_MULTI_INPUT }, - { HID_USB_DEVICE(USB_VENDOR_ID_MGE, USB_DEVICE_ID_MGE_UPS), HID_QUIRK_NOGET }, { HID_USB_DEVICE(USB_VENDOR_ID_MICROSOFT, USB_DEVICE_ID_MS_POWER_COVER), HID_QUIRK_NO_INIT_REPORTS }, { HID_USB_DEVICE(USB_VENDOR_ID_MICROSOFT, USB_DEVICE_ID_MS_SURFACE_PRO_2), HID_QUIRK_NO_INIT_REPORTS }, { HID_USB_DEVICE(USB_VENDOR_ID_MICROSOFT, USB_DEVICE_ID_MS_TOUCH_COVER_2), HID_QUIRK_NO_INIT_REPORTS }, From 7dd8db68949a7acc5bd528ee0ecb8f8720f49921 Mon Sep 17 00:00:00 2001 From: Benjamin Tissoires <benjamin.tissoires@redhat.com> Date: Fri, 12 Oct 2018 16:05:25 +0200 Subject: [PATCH 013/443] HID: alps: allow incoming reports when only the trackstick is opened If userspace only reads the trackstick node, and no one is listening to the touchpad nor the hidraw node then, the device is not powered on. Add open/close callbacks to allow users to disable the touchpad in Gnome while keeping the trackstick active. Link: https://bugzilla.redhat.com/show_bug.cgi?id=1559632 Link: https://gitlab.gnome.org/GNOME/mutter/issues/128 Signed-off-by: Benjamin Tissoires <benjamin.tissoires@redhat.com> Signed-off-by: Jiri Kosina <jkosina@suse.cz> --- drivers/hid/hid-alps.c | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/drivers/hid/hid-alps.c b/drivers/hid/hid-alps.c index aec253b441568..3cd7229b6e546 100644 --- a/drivers/hid/hid-alps.c +++ b/drivers/hid/hid-alps.c @@ -660,6 +660,20 @@ static int T4_init(struct hid_device *hdev, struct alps_dev *pri_data) return ret; } +static int alps_sp_open(struct input_dev *dev) +{ + struct hid_device *hid = input_get_drvdata(dev); + + return hid_hw_open(hid); +} + +static void alps_sp_close(struct input_dev *dev) +{ + struct hid_device *hid = input_get_drvdata(dev); + + hid_hw_close(hid); +} + static int alps_input_configured(struct hid_device *hdev, struct hid_input *hi) { struct alps_dev *data = hid_get_drvdata(hdev); @@ -733,6 +747,10 @@ static int alps_input_configured(struct hid_device *hdev, struct hid_input *hi) input2->id.version = input->id.version; input2->dev.parent = input->dev.parent; + input_set_drvdata(input2, hdev); + input2->open = alps_sp_open; + input2->close = alps_sp_close; + __set_bit(EV_KEY, input2->evbit); data->sp_btn_cnt = (data->sp_btn_info & 0x0F); for (i = 0; i < data->sp_btn_cnt; i++) From 4269fea768a11a447d8de620ce420f2214d4685c Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso <pablo@netfilter.org> Date: Fri, 26 Oct 2018 11:14:28 +0200 Subject: [PATCH 014/443] Revert "netfilter: nft_numgen: add map lookups for numgen random operations" Laura found a better way to do this from userspace without requiring kernel infrastructure, revert this. Fixes: 978d8f9055c3 ("netfilter: nft_numgen: add map lookups for numgen random operations") Signed-off-by: Laura Garcia Liebana <nevola@gmail.com> Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org> --- include/uapi/linux/netfilter/nf_tables.h | 4 +- net/netfilter/nft_numgen.c | 127 ----------------------- 2 files changed, 2 insertions(+), 129 deletions(-) diff --git a/include/uapi/linux/netfilter/nf_tables.h b/include/uapi/linux/netfilter/nf_tables.h index 579974b0bf0d8..7de4f1bdaf06a 100644 --- a/include/uapi/linux/netfilter/nf_tables.h +++ b/include/uapi/linux/netfilter/nf_tables.h @@ -1635,8 +1635,8 @@ enum nft_ng_attributes { NFTA_NG_MODULUS, NFTA_NG_TYPE, NFTA_NG_OFFSET, - NFTA_NG_SET_NAME, - NFTA_NG_SET_ID, + NFTA_NG_SET_NAME, /* deprecated */ + NFTA_NG_SET_ID, /* deprecated */ __NFTA_NG_MAX }; #define NFTA_NG_MAX (__NFTA_NG_MAX - 1) diff --git a/net/netfilter/nft_numgen.c b/net/netfilter/nft_numgen.c index 649d1700ec5ba..3cc1b3dc3c3cd 100644 --- a/net/netfilter/nft_numgen.c +++ b/net/netfilter/nft_numgen.c @@ -24,7 +24,6 @@ struct nft_ng_inc { u32 modulus; atomic_t counter; u32 offset; - struct nft_set *map; }; static u32 nft_ng_inc_gen(struct nft_ng_inc *priv) @@ -48,34 +47,11 @@ static void nft_ng_inc_eval(const struct nft_expr *expr, regs->data[priv->dreg] = nft_ng_inc_gen(priv); } -static void nft_ng_inc_map_eval(const struct nft_expr *expr, - struct nft_regs *regs, - const struct nft_pktinfo *pkt) -{ - struct nft_ng_inc *priv = nft_expr_priv(expr); - const struct nft_set *map = priv->map; - const struct nft_set_ext *ext; - u32 result; - bool found; - - result = nft_ng_inc_gen(priv); - found = map->ops->lookup(nft_net(pkt), map, &result, &ext); - - if (!found) - return; - - nft_data_copy(®s->data[priv->dreg], - nft_set_ext_data(ext), map->dlen); -} - static const struct nla_policy nft_ng_policy[NFTA_NG_MAX + 1] = { [NFTA_NG_DREG] = { .type = NLA_U32 }, [NFTA_NG_MODULUS] = { .type = NLA_U32 }, [NFTA_NG_TYPE] = { .type = NLA_U32 }, [NFTA_NG_OFFSET] = { .type = NLA_U32 }, - [NFTA_NG_SET_NAME] = { .type = NLA_STRING, - .len = NFT_SET_MAXNAMELEN - 1 }, - [NFTA_NG_SET_ID] = { .type = NLA_U32 }, }; static int nft_ng_inc_init(const struct nft_ctx *ctx, @@ -101,22 +77,6 @@ static int nft_ng_inc_init(const struct nft_ctx *ctx, NFT_DATA_VALUE, sizeof(u32)); } -static int nft_ng_inc_map_init(const struct nft_ctx *ctx, - const struct nft_expr *expr, - const struct nlattr * const tb[]) -{ - struct nft_ng_inc *priv = nft_expr_priv(expr); - u8 genmask = nft_genmask_next(ctx->net); - - nft_ng_inc_init(ctx, expr, tb); - - priv->map = nft_set_lookup_global(ctx->net, ctx->table, - tb[NFTA_NG_SET_NAME], - tb[NFTA_NG_SET_ID], genmask); - - return PTR_ERR_OR_ZERO(priv->map); -} - static int nft_ng_dump(struct sk_buff *skb, enum nft_registers dreg, u32 modulus, enum nft_ng_types type, u32 offset) { @@ -143,27 +103,10 @@ static int nft_ng_inc_dump(struct sk_buff *skb, const struct nft_expr *expr) priv->offset); } -static int nft_ng_inc_map_dump(struct sk_buff *skb, - const struct nft_expr *expr) -{ - const struct nft_ng_inc *priv = nft_expr_priv(expr); - - if (nft_ng_dump(skb, priv->dreg, priv->modulus, - NFT_NG_INCREMENTAL, priv->offset) || - nla_put_string(skb, NFTA_NG_SET_NAME, priv->map->name)) - goto nla_put_failure; - - return 0; - -nla_put_failure: - return -1; -} - struct nft_ng_random { enum nft_registers dreg:8; u32 modulus; u32 offset; - struct nft_set *map; }; static u32 nft_ng_random_gen(struct nft_ng_random *priv) @@ -183,25 +126,6 @@ static void nft_ng_random_eval(const struct nft_expr *expr, regs->data[priv->dreg] = nft_ng_random_gen(priv); } -static void nft_ng_random_map_eval(const struct nft_expr *expr, - struct nft_regs *regs, - const struct nft_pktinfo *pkt) -{ - struct nft_ng_random *priv = nft_expr_priv(expr); - const struct nft_set *map = priv->map; - const struct nft_set_ext *ext; - u32 result; - bool found; - - result = nft_ng_random_gen(priv); - found = map->ops->lookup(nft_net(pkt), map, &result, &ext); - if (!found) - return; - - nft_data_copy(®s->data[priv->dreg], - nft_set_ext_data(ext), map->dlen); -} - static int nft_ng_random_init(const struct nft_ctx *ctx, const struct nft_expr *expr, const struct nlattr * const tb[]) @@ -226,21 +150,6 @@ static int nft_ng_random_init(const struct nft_ctx *ctx, NFT_DATA_VALUE, sizeof(u32)); } -static int nft_ng_random_map_init(const struct nft_ctx *ctx, - const struct nft_expr *expr, - const struct nlattr * const tb[]) -{ - struct nft_ng_random *priv = nft_expr_priv(expr); - u8 genmask = nft_genmask_next(ctx->net); - - nft_ng_random_init(ctx, expr, tb); - priv->map = nft_set_lookup_global(ctx->net, ctx->table, - tb[NFTA_NG_SET_NAME], - tb[NFTA_NG_SET_ID], genmask); - - return PTR_ERR_OR_ZERO(priv->map); -} - static int nft_ng_random_dump(struct sk_buff *skb, const struct nft_expr *expr) { const struct nft_ng_random *priv = nft_expr_priv(expr); @@ -249,22 +158,6 @@ static int nft_ng_random_dump(struct sk_buff *skb, const struct nft_expr *expr) priv->offset); } -static int nft_ng_random_map_dump(struct sk_buff *skb, - const struct nft_expr *expr) -{ - const struct nft_ng_random *priv = nft_expr_priv(expr); - - if (nft_ng_dump(skb, priv->dreg, priv->modulus, - NFT_NG_RANDOM, priv->offset) || - nla_put_string(skb, NFTA_NG_SET_NAME, priv->map->name)) - goto nla_put_failure; - - return 0; - -nla_put_failure: - return -1; -} - static struct nft_expr_type nft_ng_type; static const struct nft_expr_ops nft_ng_inc_ops = { .type = &nft_ng_type, @@ -274,14 +167,6 @@ static const struct nft_expr_ops nft_ng_inc_ops = { .dump = nft_ng_inc_dump, }; -static const struct nft_expr_ops nft_ng_inc_map_ops = { - .type = &nft_ng_type, - .size = NFT_EXPR_SIZE(sizeof(struct nft_ng_inc)), - .eval = nft_ng_inc_map_eval, - .init = nft_ng_inc_map_init, - .dump = nft_ng_inc_map_dump, -}; - static const struct nft_expr_ops nft_ng_random_ops = { .type = &nft_ng_type, .size = NFT_EXPR_SIZE(sizeof(struct nft_ng_random)), @@ -290,14 +175,6 @@ static const struct nft_expr_ops nft_ng_random_ops = { .dump = nft_ng_random_dump, }; -static const struct nft_expr_ops nft_ng_random_map_ops = { - .type = &nft_ng_type, - .size = NFT_EXPR_SIZE(sizeof(struct nft_ng_random)), - .eval = nft_ng_random_map_eval, - .init = nft_ng_random_map_init, - .dump = nft_ng_random_map_dump, -}; - static const struct nft_expr_ops * nft_ng_select_ops(const struct nft_ctx *ctx, const struct nlattr * const tb[]) { @@ -312,12 +189,8 @@ nft_ng_select_ops(const struct nft_ctx *ctx, const struct nlattr * const tb[]) switch (type) { case NFT_NG_INCREMENTAL: - if (tb[NFTA_NG_SET_NAME]) - return &nft_ng_inc_map_ops; return &nft_ng_inc_ops; case NFT_NG_RANDOM: - if (tb[NFTA_NG_SET_NAME]) - return &nft_ng_random_map_ops; return &nft_ng_random_ops; } From c6e0bba4ac44fd3d116d763c834dd43f3824f635 Mon Sep 17 00:00:00 2001 From: Jiri Kosina <jkosina@suse.cz> Date: Sat, 27 Oct 2018 14:16:13 +0200 Subject: [PATCH 015/443] HID: moving to group maintainership model Benjamin and myself will from now on be sharing maintainership responsibilities for hid.git. Update maintainers to reflect that change, and also move a git repository to shared space at kernel.org. Signed-off-by: Jiri Kosina <jkosina@suse.cz> Signed-off-by: Benjamin Tissoires <benjamin.tissoires@redhat.com> Signed-off-by: Jiri Kosina <jkosina@suse.cz> --- MAINTAINERS | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/MAINTAINERS b/MAINTAINERS index c0c4c50039983..4715cb8d95844 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -6588,9 +6588,9 @@ F: arch/*/include/asm/suspend*.h HID CORE LAYER M: Jiri Kosina <jikos@kernel.org> -R: Benjamin Tissoires <benjamin.tissoires@redhat.com> +M: Benjamin Tissoires <benjamin.tissoires@redhat.com> L: linux-input@vger.kernel.org -T: git git://git.kernel.org/pub/scm/linux/kernel/git/jikos/hid.git +T: git git://git.kernel.org/pub/scm/linux/kernel/git/hid/hid.git S: Maintained F: drivers/hid/ F: include/linux/hid* @@ -15254,9 +15254,9 @@ F: include/linux/usb/gadget* USB HID/HIDBP DRIVERS (USB KEYBOARDS, MICE, REMOTE CONTROLS, ...) M: Jiri Kosina <jikos@kernel.org> -R: Benjamin Tissoires <benjamin.tissoires@redhat.com> +M: Benjamin Tissoires <benjamin.tissoires@redhat.com> L: linux-usb@vger.kernel.org -T: git git://git.kernel.org/pub/scm/linux/kernel/git/jikos/hid.git +T: git git://git.kernel.org/pub/scm/linux/kernel/git/hid/hid.git S: Maintained F: Documentation/hid/hiddev.txt F: drivers/hid/usbhid/ From bc0686ff5fad7a842cc88377439e78be87fedc80 Mon Sep 17 00:00:00 2001 From: Hang Yuan <hang.yuan@linux.intel.com> Date: Wed, 19 Sep 2018 14:42:10 +0800 Subject: [PATCH 016/443] drm/i915/gvt: support inconsecutive partial gtt entry write Previously we assumed two 4-byte writes to the same PTE coming in sequence. But recently we observed inconsecutive partial write happening as well. So this patch enhances the previous solution. It now uses a list to save more partial writes. If one partial write can be combined with another one in the list to construct a full PTE, update its shadow entry. Otherwise, save the partial write in the list. v2: invalidate old entry and flush ggtt (Zhenyu) v3: split old ggtt page unmap to another patch (Zhenyu) v4: refine codes (Zhenyu) Signed-off-by: Hang Yuan <hang.yuan@linux.intel.com> Cc: Yan Zhao <yan.y.zhao@intel.com> Cc: Xiaolin Zhang <xiaolin.zhang@intel.com> Cc: Zhenyu Wang <zhenyu.z.wang@intel.com> Reviewed-by: Xiaolin Zhang <xiaolin.zhang@intel.com> Signed-off-by: Zhenyu Wang <zhenyuw@linux.intel.com> --- drivers/gpu/drm/i915/gvt/gtt.c | 107 ++++++++++++++++----------------- drivers/gpu/drm/i915/gvt/gtt.h | 9 ++- 2 files changed, 60 insertions(+), 56 deletions(-) diff --git a/drivers/gpu/drm/i915/gvt/gtt.c b/drivers/gpu/drm/i915/gvt/gtt.c index c11e353ca9043..919de5a1bafb7 100644 --- a/drivers/gpu/drm/i915/gvt/gtt.c +++ b/drivers/gpu/drm/i915/gvt/gtt.c @@ -1901,7 +1901,6 @@ static struct intel_vgpu_mm *intel_vgpu_create_ggtt_mm(struct intel_vgpu *vgpu) vgpu_free_mm(mm); return ERR_PTR(-ENOMEM); } - mm->ggtt_mm.last_partial_off = -1UL; return mm; } @@ -1926,7 +1925,6 @@ void _intel_vgpu_mm_release(struct kref *mm_ref) invalidate_ppgtt_mm(mm); } else { vfree(mm->ggtt_mm.virtual_ggtt); - mm->ggtt_mm.last_partial_off = -1UL; } vgpu_free_mm(mm); @@ -2164,6 +2162,8 @@ static int emulate_ggtt_mmio_write(struct intel_vgpu *vgpu, unsigned int off, struct intel_gvt_gtt_entry e, m; dma_addr_t dma_addr; int ret; + struct intel_gvt_partial_pte *partial_pte, *pos, *n; + bool partial_update = false; if (bytes != 4 && bytes != 8) return -EINVAL; @@ -2174,68 +2174,57 @@ static int emulate_ggtt_mmio_write(struct intel_vgpu *vgpu, unsigned int off, if (!vgpu_gmadr_is_valid(vgpu, gma)) return 0; - ggtt_get_guest_entry(ggtt_mm, &e, g_gtt_index); - + e.type = GTT_TYPE_GGTT_PTE; memcpy((void *)&e.val64 + (off & (info->gtt_entry_size - 1)), p_data, bytes); /* If ggtt entry size is 8 bytes, and it's split into two 4 bytes - * write, we assume the two 4 bytes writes are consecutive. - * Otherwise, we abort and report error + * write, save the first 4 bytes in a list and update virtual + * PTE. Only update shadow PTE when the second 4 bytes comes. */ if (bytes < info->gtt_entry_size) { - if (ggtt_mm->ggtt_mm.last_partial_off == -1UL) { - /* the first partial part*/ - ggtt_mm->ggtt_mm.last_partial_off = off; - ggtt_mm->ggtt_mm.last_partial_data = e.val64; - return 0; - } else if ((g_gtt_index == - (ggtt_mm->ggtt_mm.last_partial_off >> - info->gtt_entry_size_shift)) && - (off != ggtt_mm->ggtt_mm.last_partial_off)) { - /* the second partial part */ - - int last_off = ggtt_mm->ggtt_mm.last_partial_off & - (info->gtt_entry_size - 1); - - memcpy((void *)&e.val64 + last_off, - (void *)&ggtt_mm->ggtt_mm.last_partial_data + - last_off, bytes); - - ggtt_mm->ggtt_mm.last_partial_off = -1UL; - } else { - int last_offset; - - gvt_vgpu_err("failed to populate guest ggtt entry: abnormal ggtt entry write sequence, last_partial_off=%lx, offset=%x, bytes=%d, ggtt entry size=%d\n", - ggtt_mm->ggtt_mm.last_partial_off, off, - bytes, info->gtt_entry_size); - - /* set host ggtt entry to scratch page and clear - * virtual ggtt entry as not present for last - * partially write offset - */ - last_offset = ggtt_mm->ggtt_mm.last_partial_off & - (~(info->gtt_entry_size - 1)); - - ggtt_get_host_entry(ggtt_mm, &m, last_offset); - ggtt_invalidate_pte(vgpu, &m); - ops->set_pfn(&m, gvt->gtt.scratch_mfn); - ops->clear_present(&m); - ggtt_set_host_entry(ggtt_mm, &m, last_offset); - ggtt_invalidate(gvt->dev_priv); - - ggtt_get_guest_entry(ggtt_mm, &e, last_offset); - ops->clear_present(&e); - ggtt_set_guest_entry(ggtt_mm, &e, last_offset); - - ggtt_mm->ggtt_mm.last_partial_off = off; - ggtt_mm->ggtt_mm.last_partial_data = e.val64; + bool found = false; + + list_for_each_entry_safe(pos, n, + &ggtt_mm->ggtt_mm.partial_pte_list, list) { + if (g_gtt_index == pos->offset >> + info->gtt_entry_size_shift) { + if (off != pos->offset) { + /* the second partial part*/ + int last_off = pos->offset & + (info->gtt_entry_size - 1); + + memcpy((void *)&e.val64 + last_off, + (void *)&pos->data + last_off, + bytes); + + list_del(&pos->list); + kfree(pos); + found = true; + break; + } + + /* update of the first partial part */ + pos->data = e.val64; + ggtt_set_guest_entry(ggtt_mm, &e, g_gtt_index); + return 0; + } + } - return 0; + if (!found) { + /* the first partial part */ + partial_pte = kzalloc(sizeof(*partial_pte), GFP_KERNEL); + if (!partial_pte) + return -ENOMEM; + partial_pte->offset = off; + partial_pte->data = e.val64; + list_add_tail(&partial_pte->list, + &ggtt_mm->ggtt_mm.partial_pte_list); + partial_update = true; } } - if (ops->test_present(&e)) { + if (!partial_update && (ops->test_present(&e))) { gfn = ops->get_pfn(&e); m = e; @@ -2428,6 +2417,8 @@ int intel_vgpu_init_gtt(struct intel_vgpu *vgpu) intel_vgpu_reset_ggtt(vgpu, false); + INIT_LIST_HEAD(>t->ggtt_mm->ggtt_mm.partial_pte_list); + return create_scratch_page_tree(vgpu); } @@ -2452,6 +2443,14 @@ static void intel_vgpu_destroy_all_ppgtt_mm(struct intel_vgpu *vgpu) static void intel_vgpu_destroy_ggtt_mm(struct intel_vgpu *vgpu) { + struct intel_gvt_partial_pte *pos; + + list_for_each_entry(pos, + &vgpu->gtt.ggtt_mm->ggtt_mm.partial_pte_list, list) { + gvt_dbg_mm("partial PTE update on hold 0x%lx : 0x%llx\n", + pos->offset, pos->data); + kfree(pos); + } intel_vgpu_destroy_mm(vgpu->gtt.ggtt_mm); vgpu->gtt.ggtt_mm = NULL; } diff --git a/drivers/gpu/drm/i915/gvt/gtt.h b/drivers/gpu/drm/i915/gvt/gtt.h index 7a9b36176efb7..a11bfee1e0c85 100644 --- a/drivers/gpu/drm/i915/gvt/gtt.h +++ b/drivers/gpu/drm/i915/gvt/gtt.h @@ -133,6 +133,12 @@ enum intel_gvt_mm_type { #define GVT_RING_CTX_NR_PDPS GEN8_3LVL_PDPES +struct intel_gvt_partial_pte { + unsigned long offset; + u64 data; + struct list_head list; +}; + struct intel_vgpu_mm { enum intel_gvt_mm_type type; struct intel_vgpu *vgpu; @@ -157,8 +163,7 @@ struct intel_vgpu_mm { } ppgtt_mm; struct { void *virtual_ggtt; - unsigned long last_partial_off; - u64 last_partial_data; + struct list_head partial_pte_list; } ggtt_mm; }; }; From 606a745944bc0ebd14f77dfc61ac7d6cb685cefe Mon Sep 17 00:00:00 2001 From: Xinyun Liu <xinyun.liu@intel.com> Date: Wed, 19 Sep 2018 15:28:53 +0800 Subject: [PATCH 017/443] drm/i915/gvt: correct mask setting for CSFE_CHICKEN1 CSFE_CHICKEN1(0x20d4) needs access with mask. This is caught in AcrnGT conformance check test: [drm:intel_gvt_vgpu_conformance_check] *ERROR* gvt: vgpu1 unconformance mmio 0x20d4:0x40004,0x4 Signed-off-by: Xinyun Liu <xinyun.liu@intel.com> Signed-off-by: Zhi Wang <zhi.a.wang@intel.com> Signed-off-by: Zhenyu Wang <zhenyuw@linux.intel.com> --- drivers/gpu/drm/i915/gvt/mmio_context.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/gvt/mmio_context.c b/drivers/gpu/drm/i915/gvt/mmio_context.c index e872f4847fbe0..088a62ab2bc89 100644 --- a/drivers/gpu/drm/i915/gvt/mmio_context.c +++ b/drivers/gpu/drm/i915/gvt/mmio_context.c @@ -144,7 +144,7 @@ static struct engine_mmio gen9_engine_mmio_list[] __cacheline_aligned = { {RCS, GAMT_CHKN_BIT_REG, 0x0, false}, /* 0x4ab8 */ {RCS, GEN9_GAMT_ECO_REG_RW_IA, 0x0, false}, /* 0x4ab0 */ - {RCS, GEN9_CSFE_CHICKEN1_RCS, 0x0, false}, /* 0x20d4 */ + {RCS, GEN9_CSFE_CHICKEN1_RCS, 0xffff, false}, /* 0x20d4 */ {RCS, GEN8_GARBCNTL, 0x0, false}, /* 0xb004 */ {RCS, GEN7_FF_THREAD_MODE, 0x0, false}, /* 0x20a0 */ From 5e7154ff5e8e21dc9acac4f8dba7533552365374 Mon Sep 17 00:00:00 2001 From: Longhe Zheng <longhe.zheng@intel.com> Date: Tue, 30 Oct 2018 16:12:10 +0800 Subject: [PATCH 018/443] drm/i915/gvt: Handle values of EDP_PSR_IMR and EDP_PSR_IIR GVT-g only simulates DP port for guest and leaves EDP_PSR_IMR and EDP_PSR_IIR registers as default MMIO read/write. So guest won't get expected initial values of these registers when initializing the gpu driver, which results in following warning and logs. -------- Interrupt register 0x64838 is not zero: 0xffffffff WARNING: CPU: 1 PID: 157 at drivers/gpu/drm/i915/i915_irq.c:177 gen3_assert_iir_is_zero+0x38/0xa0 Call Trace: gen8_de_irq_postinstall+0xa7/0x400 gen8_irq_postinstall+0x27/0x80 drm_irq_install+0xbc/0x140 i915_driver_load+0xa9d/0xd50 -------- Because GVT-g does not handle EDP(embedded DP) simulation for guests, always set EDP_PSR_IMR and EDP_PSR_IIR to value 0. Signed-off-by: Longhe Zheng <longhe.zheng@intel.com> Signed-off-by: Zhenyu Wang <zhenyuw@linux.intel.com> --- drivers/gpu/drm/i915/gvt/handlers.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/i915/gvt/handlers.c b/drivers/gpu/drm/i915/gvt/handlers.c index 94c1089ecf59e..f9002cb1f2a3b 100644 --- a/drivers/gpu/drm/i915/gvt/handlers.c +++ b/drivers/gpu/drm/i915/gvt/handlers.c @@ -1608,7 +1608,7 @@ static int bxt_gt_disp_pwron_write(struct intel_vgpu *vgpu, return 0; } -static int bxt_edp_psr_imr_iir_write(struct intel_vgpu *vgpu, +static int edp_psr_imr_iir_write(struct intel_vgpu *vgpu, unsigned int offset, void *p_data, unsigned int bytes) { vgpu_vreg(vgpu, offset) = 0; @@ -2613,6 +2613,9 @@ static int init_generic_mmio_info(struct intel_gvt *gvt) MMIO_DFH(_MMIO(0x1a178), D_BDW_PLUS, F_CMD_ACCESS, NULL, NULL); MMIO_DFH(_MMIO(0x1a17c), D_BDW_PLUS, F_CMD_ACCESS, NULL, NULL); MMIO_DFH(_MMIO(0x2217c), D_BDW_PLUS, F_CMD_ACCESS, NULL, NULL); + + MMIO_DH(EDP_PSR_IMR, D_BDW_PLUS, NULL, edp_psr_imr_iir_write); + MMIO_DH(EDP_PSR_IIR, D_BDW_PLUS, NULL, edp_psr_imr_iir_write); return 0; } @@ -3216,9 +3219,6 @@ static int init_bxt_mmio_info(struct intel_gvt *gvt) MMIO_D(HSW_TVIDEO_DIP_GCP(TRANSCODER_B), D_BXT); MMIO_D(HSW_TVIDEO_DIP_GCP(TRANSCODER_C), D_BXT); - MMIO_DH(EDP_PSR_IMR, D_BXT, NULL, bxt_edp_psr_imr_iir_write); - MMIO_DH(EDP_PSR_IIR, D_BXT, NULL, bxt_edp_psr_imr_iir_write); - MMIO_D(RC6_CTX_BASE, D_BXT); MMIO_D(GEN8_PUSHBUS_CONTROL, D_BXT); From 399474e4c1100bca264ed14fa3ad0d68fab484d8 Mon Sep 17 00:00:00 2001 From: Julian Sax <jsbc@gmx.de> Date: Wed, 24 Oct 2018 22:40:26 +0200 Subject: [PATCH 019/443] HID: i2c-hid: add Direkt-Tek DTLAPY133-1 to descriptor override This device uses the SIPODEV SP1064 touchpad, which does not supply descriptors, so it has to be added to the override list. Reported-by: Tim Aldridge <taldridge@mac.com> Signed-off-by: Julian Sax <jsbc@gmx.de> Signed-off-by: Jiri Kosina <jkosina@suse.cz> --- drivers/hid/i2c-hid/i2c-hid-dmi-quirks.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/drivers/hid/i2c-hid/i2c-hid-dmi-quirks.c b/drivers/hid/i2c-hid/i2c-hid-dmi-quirks.c index 1d645c9ab417b..9ca2fcc48b5a5 100644 --- a/drivers/hid/i2c-hid/i2c-hid-dmi-quirks.c +++ b/drivers/hid/i2c-hid/i2c-hid-dmi-quirks.c @@ -330,6 +330,14 @@ static const struct dmi_system_id i2c_hid_dmi_desc_override_table[] = { }, .driver_data = (void *)&sipodev_desc }, + { + .ident = "Direkt-Tek DTLAPY133-1", + .matches = { + DMI_EXACT_MATCH(DMI_SYS_VENDOR, "Direkt-Tek"), + DMI_EXACT_MATCH(DMI_PRODUCT_NAME, "DTLAPY133-1"), + }, + .driver_data = (void *)&sipodev_desc + }, { .ident = "Mediacom Flexbook Edge 11", .matches = { From 439cd39ea136d2c026805264d58a91f36b6b64ca Mon Sep 17 00:00:00 2001 From: Stefano Brivio <sbrivio@redhat.com> Date: Sat, 14 Jul 2018 21:59:43 +0200 Subject: [PATCH 020/443] netfilter: ipset: list:set: Decrease refcount synchronously on deletion and replace Commit 45040978c899 ("netfilter: ipset: Fix set:list type crash when flush/dump set in parallel") postponed decreasing set reference counters to the RCU callback. An 'ipset del' command can terminate before the RCU grace period is elapsed, and if sets are listed before then, the reference counter shown in userspace will be wrong: # ipset create h hash:ip; ipset create l list:set; ipset add l # ipset del l h; ipset list h Name: h Type: hash:ip Revision: 4 Header: family inet hashsize 1024 maxelem 65536 Size in memory: 88 References: 1 Number of entries: 0 Members: # sleep 1; ipset list h Name: h Type: hash:ip Revision: 4 Header: family inet hashsize 1024 maxelem 65536 Size in memory: 88 References: 0 Number of entries: 0 Members: Fix this by making the reference count update synchronous again. As a result, when sets are listed, ip_set_name_byindex() might now fetch a set whose reference count is already zero. Instead of relying on the reference count to protect against concurrent set renaming, grab ip_set_ref_lock as reader and copy the name, while holding the same lock in ip_set_rename() as writer instead. Reported-by: Li Shuang <shuali@redhat.com> Fixes: 45040978c899 ("netfilter: ipset: Fix set:list type crash when flush/dump set in parallel") Signed-off-by: Stefano Brivio <sbrivio@redhat.com> Signed-off-by: Jozsef Kadlecsik <kadlec@blackhole.kfki.hu> Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org> --- include/linux/netfilter/ipset/ip_set.h | 2 +- net/netfilter/ipset/ip_set_core.c | 23 +++++++++++------------ net/netfilter/ipset/ip_set_list_set.c | 17 +++++++++++------ 3 files changed, 23 insertions(+), 19 deletions(-) diff --git a/include/linux/netfilter/ipset/ip_set.h b/include/linux/netfilter/ipset/ip_set.h index 34fc80f3eb900..1d100efe74ec7 100644 --- a/include/linux/netfilter/ipset/ip_set.h +++ b/include/linux/netfilter/ipset/ip_set.h @@ -314,7 +314,7 @@ enum { extern ip_set_id_t ip_set_get_byname(struct net *net, const char *name, struct ip_set **set); extern void ip_set_put_byindex(struct net *net, ip_set_id_t index); -extern const char *ip_set_name_byindex(struct net *net, ip_set_id_t index); +extern void ip_set_name_byindex(struct net *net, ip_set_id_t index, char *name); extern ip_set_id_t ip_set_nfnl_get_byindex(struct net *net, ip_set_id_t index); extern void ip_set_nfnl_put(struct net *net, ip_set_id_t index); diff --git a/net/netfilter/ipset/ip_set_core.c b/net/netfilter/ipset/ip_set_core.c index bc4bd247bb7d4..fa15a831aeee5 100644 --- a/net/netfilter/ipset/ip_set_core.c +++ b/net/netfilter/ipset/ip_set_core.c @@ -693,21 +693,20 @@ ip_set_put_byindex(struct net *net, ip_set_id_t index) EXPORT_SYMBOL_GPL(ip_set_put_byindex); /* Get the name of a set behind a set index. - * We assume the set is referenced, so it does exist and - * can't be destroyed. The set cannot be renamed due to - * the referencing either. - * + * Set itself is protected by RCU, but its name isn't: to protect against + * renaming, grab ip_set_ref_lock as reader (see ip_set_rename()) and copy the + * name. */ -const char * -ip_set_name_byindex(struct net *net, ip_set_id_t index) +void +ip_set_name_byindex(struct net *net, ip_set_id_t index, char *name) { - const struct ip_set *set = ip_set_rcu_get(net, index); + struct ip_set *set = ip_set_rcu_get(net, index); BUG_ON(!set); - BUG_ON(set->ref == 0); - /* Referenced, so it's safe */ - return set->name; + read_lock_bh(&ip_set_ref_lock); + strncpy(name, set->name, IPSET_MAXNAMELEN); + read_unlock_bh(&ip_set_ref_lock); } EXPORT_SYMBOL_GPL(ip_set_name_byindex); @@ -1153,7 +1152,7 @@ static int ip_set_rename(struct net *net, struct sock *ctnl, if (!set) return -ENOENT; - read_lock_bh(&ip_set_ref_lock); + write_lock_bh(&ip_set_ref_lock); if (set->ref != 0) { ret = -IPSET_ERR_REFERENCED; goto out; @@ -1170,7 +1169,7 @@ static int ip_set_rename(struct net *net, struct sock *ctnl, strncpy(set->name, name2, IPSET_MAXNAMELEN); out: - read_unlock_bh(&ip_set_ref_lock); + write_unlock_bh(&ip_set_ref_lock); return ret; } diff --git a/net/netfilter/ipset/ip_set_list_set.c b/net/netfilter/ipset/ip_set_list_set.c index 072a658fde047..4eef55da0878e 100644 --- a/net/netfilter/ipset/ip_set_list_set.c +++ b/net/netfilter/ipset/ip_set_list_set.c @@ -148,9 +148,7 @@ __list_set_del_rcu(struct rcu_head * rcu) { struct set_elem *e = container_of(rcu, struct set_elem, rcu); struct ip_set *set = e->set; - struct list_set *map = set->data; - ip_set_put_byindex(map->net, e->id); ip_set_ext_destroy(set, e); kfree(e); } @@ -158,15 +156,21 @@ __list_set_del_rcu(struct rcu_head * rcu) static inline void list_set_del(struct ip_set *set, struct set_elem *e) { + struct list_set *map = set->data; + set->elements--; list_del_rcu(&e->list); + ip_set_put_byindex(map->net, e->id); call_rcu(&e->rcu, __list_set_del_rcu); } static inline void -list_set_replace(struct set_elem *e, struct set_elem *old) +list_set_replace(struct ip_set *set, struct set_elem *e, struct set_elem *old) { + struct list_set *map = set->data; + list_replace_rcu(&old->list, &e->list); + ip_set_put_byindex(map->net, old->id); call_rcu(&old->rcu, __list_set_del_rcu); } @@ -298,7 +302,7 @@ list_set_uadd(struct ip_set *set, void *value, const struct ip_set_ext *ext, INIT_LIST_HEAD(&e->list); list_set_init_extensions(set, ext, e); if (n) - list_set_replace(e, n); + list_set_replace(set, e, n); else if (next) list_add_tail_rcu(&e->list, &next->list); else if (prev) @@ -486,6 +490,7 @@ list_set_list(const struct ip_set *set, const struct list_set *map = set->data; struct nlattr *atd, *nested; u32 i = 0, first = cb->args[IPSET_CB_ARG0]; + char name[IPSET_MAXNAMELEN]; struct set_elem *e; int ret = 0; @@ -504,8 +509,8 @@ list_set_list(const struct ip_set *set, nested = ipset_nest_start(skb, IPSET_ATTR_DATA); if (!nested) goto nla_put_failure; - if (nla_put_string(skb, IPSET_ATTR_NAME, - ip_set_name_byindex(map->net, e->id))) + ip_set_name_byindex(map->net, e->id, name); + if (nla_put_string(skb, IPSET_ATTR_NAME, name)) goto nla_put_failure; if (ip_set_put_extensions(skb, set, e, true)) goto nla_put_failure; From 886503f34d63e681662057448819edb5b1057a97 Mon Sep 17 00:00:00 2001 From: Eric Westbrook <eric@westbrook.io> Date: Tue, 28 Aug 2018 15:14:42 -0600 Subject: [PATCH 021/443] netfilter: ipset: actually allow allowable CIDR 0 in hash:net,port,net Allow /0 as advertised for hash:net,port,net sets. For "hash:net,port,net", ipset(8) says that "either subnet is permitted to be a /0 should you wish to match port between all destinations." Make that statement true. Before: # ipset create cidrzero hash:net,port,net # ipset add cidrzero 0.0.0.0/0,12345,0.0.0.0/0 ipset v6.34: The value of the CIDR parameter of the IP address is invalid # ipset create cidrzero6 hash:net,port,net family inet6 # ipset add cidrzero6 ::/0,12345,::/0 ipset v6.34: The value of the CIDR parameter of the IP address is invalid After: # ipset create cidrzero hash:net,port,net # ipset add cidrzero 0.0.0.0/0,12345,0.0.0.0/0 # ipset test cidrzero 192.168.205.129,12345,172.16.205.129 192.168.205.129,tcp:12345,172.16.205.129 is in set cidrzero. # ipset create cidrzero6 hash:net,port,net family inet6 # ipset add cidrzero6 ::/0,12345,::/0 # ipset test cidrzero6 fe80::1,12345,ff00::1 fe80::1,tcp:12345,ff00::1 is in set cidrzero6. See also: https://bugzilla.kernel.org/show_bug.cgi?id=200897 https://github.com/ewestbrook/linux/commit/df7ff6efb0934ab6acc11f003ff1a7580d6c1d9c Signed-off-by: Eric Westbrook <linux@westbrook.io> Signed-off-by: Jozsef Kadlecsik <kadlec@blackhole.kfki.hu> Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org> --- net/netfilter/ipset/ip_set_hash_netportnet.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/net/netfilter/ipset/ip_set_hash_netportnet.c b/net/netfilter/ipset/ip_set_hash_netportnet.c index d391485a6acdc..613e18e720a44 100644 --- a/net/netfilter/ipset/ip_set_hash_netportnet.c +++ b/net/netfilter/ipset/ip_set_hash_netportnet.c @@ -213,13 +213,13 @@ hash_netportnet4_uadt(struct ip_set *set, struct nlattr *tb[], if (tb[IPSET_ATTR_CIDR]) { e.cidr[0] = nla_get_u8(tb[IPSET_ATTR_CIDR]); - if (!e.cidr[0] || e.cidr[0] > HOST_MASK) + if (e.cidr[0] > HOST_MASK) return -IPSET_ERR_INVALID_CIDR; } if (tb[IPSET_ATTR_CIDR2]) { e.cidr[1] = nla_get_u8(tb[IPSET_ATTR_CIDR2]); - if (!e.cidr[1] || e.cidr[1] > HOST_MASK) + if (e.cidr[1] > HOST_MASK) return -IPSET_ERR_INVALID_CIDR; } @@ -493,13 +493,13 @@ hash_netportnet6_uadt(struct ip_set *set, struct nlattr *tb[], if (tb[IPSET_ATTR_CIDR]) { e.cidr[0] = nla_get_u8(tb[IPSET_ATTR_CIDR]); - if (!e.cidr[0] || e.cidr[0] > HOST_MASK) + if (e.cidr[0] > HOST_MASK) return -IPSET_ERR_INVALID_CIDR; } if (tb[IPSET_ATTR_CIDR2]) { e.cidr[1] = nla_get_u8(tb[IPSET_ATTR_CIDR2]); - if (!e.cidr[1] || e.cidr[1] > HOST_MASK) + if (e.cidr[1] > HOST_MASK) return -IPSET_ERR_INVALID_CIDR; } From ed956f3947a01ff9875cd908d7c1ef1fe7f47bf0 Mon Sep 17 00:00:00 2001 From: Andrey Ryabinin <aryabinin@virtuozzo.com> Date: Mon, 22 Oct 2018 23:30:40 +0200 Subject: [PATCH 022/443] netfilter: ipset: fix ip_set_list allocation failure ip_set_create() and ip_set_net_init() attempt to allocate physically contiguous memory for ip_set_list. If memory is fragmented, the allocations could easily fail: vzctl: page allocation failure: order:7, mode:0xc0d0 Call Trace: dump_stack+0x19/0x1b warn_alloc_failed+0x110/0x180 __alloc_pages_nodemask+0x7bf/0xc60 alloc_pages_current+0x98/0x110 kmalloc_order+0x18/0x40 kmalloc_order_trace+0x26/0xa0 __kmalloc+0x279/0x290 ip_set_net_init+0x4b/0x90 [ip_set] ops_init+0x3b/0xb0 setup_net+0xbb/0x170 copy_net_ns+0xf1/0x1c0 create_new_namespaces+0xf9/0x180 copy_namespaces+0x8e/0xd0 copy_process+0xb61/0x1a00 do_fork+0x91/0x320 Use kvcalloc() to fallback to 0-order allocations if high order page isn't available. Signed-off-by: Andrey Ryabinin <aryabinin@virtuozzo.com> Signed-off-by: Jozsef Kadlecsik <kadlec@blackhole.kfki.hu> Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org> --- net/netfilter/ipset/ip_set_core.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/net/netfilter/ipset/ip_set_core.c b/net/netfilter/ipset/ip_set_core.c index fa15a831aeee5..68db946df1511 100644 --- a/net/netfilter/ipset/ip_set_core.c +++ b/net/netfilter/ipset/ip_set_core.c @@ -960,7 +960,7 @@ static int ip_set_create(struct net *net, struct sock *ctnl, /* Wraparound */ goto cleanup; - list = kcalloc(i, sizeof(struct ip_set *), GFP_KERNEL); + list = kvcalloc(i, sizeof(struct ip_set *), GFP_KERNEL); if (!list) goto cleanup; /* nfnl mutex is held, both lists are valid */ @@ -972,7 +972,7 @@ static int ip_set_create(struct net *net, struct sock *ctnl, /* Use new list */ index = inst->ip_set_max; inst->ip_set_max = i; - kfree(tmp); + kvfree(tmp); ret = 0; } else if (ret) { goto cleanup; @@ -2058,7 +2058,7 @@ ip_set_net_init(struct net *net) if (inst->ip_set_max >= IPSET_INVALID_ID) inst->ip_set_max = IPSET_INVALID_ID - 1; - list = kcalloc(inst->ip_set_max, sizeof(struct ip_set *), GFP_KERNEL); + list = kvcalloc(inst->ip_set_max, sizeof(struct ip_set *), GFP_KERNEL); if (!list) return -ENOMEM; inst->is_deleted = false; @@ -2086,7 +2086,7 @@ ip_set_net_exit(struct net *net) } } nfnl_unlock(NFNL_SUBSYS_IPSET); - kfree(rcu_dereference_protected(inst->ip_set_list, 1)); + kvfree(rcu_dereference_protected(inst->ip_set_list, 1)); } static struct pernet_operations ip_set_net_ops = { From 51f5fd2e4615dcdc25cd7f9d19b7b27eb9ecdac7 Mon Sep 17 00:00:00 2001 From: Will Deacon <will.deacon@arm.com> Date: Wed, 31 Oct 2018 17:44:08 +0000 Subject: [PATCH 023/443] tools headers barrier: Fix arm64 tools build failure wrt smp_load_{acquire,release} Cheers for reporting this. I managed to reproduce the build failure with gcc version 6.3.0 20170516 (Debian 6.3.0-18+deb9u1). The code in question is the arm64 versions of smp_load_acquire() and smp_store_release(). Unlike other architectures, these are not built around READ_ONCE() and WRITE_ONCE() since we have instructions we can use instead of fences. Bringing our macros up-to-date with those (i.e. tweaking the union initialisation and using the special "uXX_alias_t" types) appears to fix the issue for me. Committer notes: Testing it in the systems previously failing: # time dm android-ndk:r12b-arm \ android-ndk:r15c-arm \ debian:experimental-x-arm64 \ ubuntu:14.04.4-x-linaro-arm64 \ ubuntu:16.04-x-arm \ ubuntu:16.04-x-arm64 \ ubuntu:18.04-x-arm \ ubuntu:18.04-x-arm64 1 android-ndk:r12b-arm : Ok arm-linux-androideabi-gcc (GCC) 4.9.x 20150123 (prerelease) 2 android-ndk:r15c-arm : Ok arm-linux-androideabi-gcc (GCC) 4.9.x 20150123 (prerelease) 3 debian:experimental-x-arm64 : Ok aarch64-linux-gnu-gcc (Debian 8.2.0-7) 8.2.0 4 ubuntu:14.04.4-x-linaro-arm64 : Ok aarch64-linux-gnu-gcc (Linaro GCC 5.5-2017.10) 5.5.0 5 ubuntu:16.04-x-arm : Ok arm-linux-gnueabihf-gcc (Ubuntu/Linaro 5.4.0-6ubuntu1~16.04.9) 5.4.0 20160609 6 ubuntu:16.04-x-arm64 : Ok aarch64-linux-gnu-gcc (Ubuntu/Linaro 5.4.0-6ubuntu1~16.04.9) 5.4.0 20160609 7 ubuntu:18.04-x-arm : Ok arm-linux-gnueabihf-gcc (Ubuntu/Linaro 7.3.0-27ubuntu1~18.04) 7.3.0 8 ubuntu:18.04-x-arm64 : Ok aarch64-linux-gnu-gcc (Ubuntu/Linaro 7.3.0-27ubuntu1~18.04) 7.3.0 Reported-by: Arnaldo Carvalho de Melo <acme@redhat.com> Signed-off-by: Will Deacon <will.deacon@arm.com> Tested-by: Arnaldo Carvalho de Melo <acme@redhat.com> Tested-by: Daniel Borkmann <daniel@iogearbox.net> Cc: Peter Zijlstra <peterz@infradead.org> Link: http://lkml.kernel.org/r/20181031174408.GA27871@arm.com Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com> --- tools/arch/arm64/include/asm/barrier.h | 133 +++++++++++++------------ 1 file changed, 67 insertions(+), 66 deletions(-) diff --git a/tools/arch/arm64/include/asm/barrier.h b/tools/arch/arm64/include/asm/barrier.h index 12835ea0e4173..378c051fa1776 100644 --- a/tools/arch/arm64/include/asm/barrier.h +++ b/tools/arch/arm64/include/asm/barrier.h @@ -14,74 +14,75 @@ #define wmb() asm volatile("dmb ishst" ::: "memory") #define rmb() asm volatile("dmb ishld" ::: "memory") -#define smp_store_release(p, v) \ -do { \ - union { typeof(*p) __val; char __c[1]; } __u = \ - { .__val = (__force typeof(*p)) (v) }; \ - \ - switch (sizeof(*p)) { \ - case 1: \ - asm volatile ("stlrb %w1, %0" \ - : "=Q" (*p) \ - : "r" (*(__u8 *)__u.__c) \ - : "memory"); \ - break; \ - case 2: \ - asm volatile ("stlrh %w1, %0" \ - : "=Q" (*p) \ - : "r" (*(__u16 *)__u.__c) \ - : "memory"); \ - break; \ - case 4: \ - asm volatile ("stlr %w1, %0" \ - : "=Q" (*p) \ - : "r" (*(__u32 *)__u.__c) \ - : "memory"); \ - break; \ - case 8: \ - asm volatile ("stlr %1, %0" \ - : "=Q" (*p) \ - : "r" (*(__u64 *)__u.__c) \ - : "memory"); \ - break; \ - default: \ - /* Only to shut up gcc ... */ \ - mb(); \ - break; \ - } \ +#define smp_store_release(p, v) \ +do { \ + union { typeof(*p) __val; char __c[1]; } __u = \ + { .__val = (v) }; \ + \ + switch (sizeof(*p)) { \ + case 1: \ + asm volatile ("stlrb %w1, %0" \ + : "=Q" (*p) \ + : "r" (*(__u8_alias_t *)__u.__c) \ + : "memory"); \ + break; \ + case 2: \ + asm volatile ("stlrh %w1, %0" \ + : "=Q" (*p) \ + : "r" (*(__u16_alias_t *)__u.__c) \ + : "memory"); \ + break; \ + case 4: \ + asm volatile ("stlr %w1, %0" \ + : "=Q" (*p) \ + : "r" (*(__u32_alias_t *)__u.__c) \ + : "memory"); \ + break; \ + case 8: \ + asm volatile ("stlr %1, %0" \ + : "=Q" (*p) \ + : "r" (*(__u64_alias_t *)__u.__c) \ + : "memory"); \ + break; \ + default: \ + /* Only to shut up gcc ... */ \ + mb(); \ + break; \ + } \ } while (0) -#define smp_load_acquire(p) \ -({ \ - union { typeof(*p) __val; char __c[1]; } __u; \ - \ - switch (sizeof(*p)) { \ - case 1: \ - asm volatile ("ldarb %w0, %1" \ - : "=r" (*(__u8 *)__u.__c) \ - : "Q" (*p) : "memory"); \ - break; \ - case 2: \ - asm volatile ("ldarh %w0, %1" \ - : "=r" (*(__u16 *)__u.__c) \ - : "Q" (*p) : "memory"); \ - break; \ - case 4: \ - asm volatile ("ldar %w0, %1" \ - : "=r" (*(__u32 *)__u.__c) \ - : "Q" (*p) : "memory"); \ - break; \ - case 8: \ - asm volatile ("ldar %0, %1" \ - : "=r" (*(__u64 *)__u.__c) \ - : "Q" (*p) : "memory"); \ - break; \ - default: \ - /* Only to shut up gcc ... */ \ - mb(); \ - break; \ - } \ - __u.__val; \ +#define smp_load_acquire(p) \ +({ \ + union { typeof(*p) __val; char __c[1]; } __u = \ + { .__c = { 0 } }; \ + \ + switch (sizeof(*p)) { \ + case 1: \ + asm volatile ("ldarb %w0, %1" \ + : "=r" (*(__u8_alias_t *)__u.__c) \ + : "Q" (*p) : "memory"); \ + break; \ + case 2: \ + asm volatile ("ldarh %w0, %1" \ + : "=r" (*(__u16_alias_t *)__u.__c) \ + : "Q" (*p) : "memory"); \ + break; \ + case 4: \ + asm volatile ("ldar %w0, %1" \ + : "=r" (*(__u32_alias_t *)__u.__c) \ + : "Q" (*p) : "memory"); \ + break; \ + case 8: \ + asm volatile ("ldar %0, %1" \ + : "=r" (*(__u64_alias_t *)__u.__c) \ + : "Q" (*p) : "memory"); \ + break; \ + default: \ + /* Only to shut up gcc ... */ \ + mb(); \ + break; \ + } \ + __u.__val; \ }) #endif /* _TOOLS_LINUX_ASM_AARCH64_BARRIER_H */ From febf8a3712e4209b7e650b37b3b240a2b387794d Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo <acme@redhat.com> Date: Thu, 1 Nov 2018 10:34:34 -0300 Subject: [PATCH 024/443] perf examples bpf: Start augmenting raw_syscalls:sys_{start,exit} The previous approach of attaching to each syscall showed how it is possible to augment tracepoints and use that augmentation, pointer payloads, in the existing beautifiers in 'perf trace', but for a more general solution we now will try to augment the main raw_syscalls:sys_{enter,exit} syscalls, and then pass instructions in maps so that it knows which syscalls and which pointer contents, and how many bytes for each of the arguments should be copied. Start with just the bare minimum to collect what is provided by those two tracepoints via the __augmented_syscalls__ map + bpf-output perf event, which results in perf trace showing them without connecting enter+exit: # perf trace -e tools/perf/examples/bpf/augmented_raw_syscalls.c sleep 1 0.000 sleep/11563 raw_syscalls:sys_exit:NR 59 = 0 0.019 ( ): sleep/11563 brk() ... 0.021 sleep/11563 raw_syscalls:sys_exit:NR 12 = 94682642325504 0.033 ( ): sleep/11563 access(filename:, mode: R) ... 0.037 sleep/11563 raw_syscalls:sys_exit:NR 21 = -2 0.041 ( ): sleep/11563 openat(dfd: CWD, filename: , flags: CLOEXEC) ... 0.044 sleep/11563 raw_syscalls:sys_exit:NR 257 = 3 0.045 ( ): sleep/11563 fstat(fd: 3, statbuf: 0x7ffdbf7119b0) ... 0.046 sleep/11563 raw_syscalls:sys_exit:NR 5 = 0 0.047 ( ): sleep/11563 mmap(len: 103334, prot: READ, flags: PRIVATE, fd: 3) ... 0.049 sleep/11563 raw_syscalls:sys_exit:NR 9 = 140196285493248 0.050 ( ): sleep/11563 close(fd: 3) ... 0.051 sleep/11563 raw_syscalls:sys_exit:NR 3 = 0 0.059 ( ): sleep/11563 openat(dfd: CWD, filename: , flags: CLOEXEC) ... 0.062 sleep/11563 raw_syscalls:sys_exit:NR 257 = 3 0.063 ( ): sleep/11563 read(fd: 3, buf: 0x7ffdbf711b78, count: 832) ... 0.065 sleep/11563 raw_syscalls:sys_exit:NR 0 = 832 0.066 ( ): sleep/11563 fstat(fd: 3, statbuf: 0x7ffdbf711a10) ... 0.067 sleep/11563 raw_syscalls:sys_exit:NR 5 = 0 0.068 ( ): sleep/11563 mmap(len: 8192, prot: READ|WRITE, flags: PRIVATE|ANONYMOUS) ... 0.070 sleep/11563 raw_syscalls:sys_exit:NR 9 = 140196285485056 0.073 ( ): sleep/11563 mmap(len: 3889792, prot: EXEC|READ, flags: PRIVATE|DENYWRITE, fd: 3) ... 0.076 sleep/11563 raw_syscalls:sys_exit:NR 9 = 140196279463936 0.077 ( ): sleep/11563 mprotect(start: 0x7f81fd8a8000, len: 2093056) ... 0.083 sleep/11563 raw_syscalls:sys_exit:NR 10 = 0 0.084 ( ): sleep/11563 mmap(addr: 0x7f81fdaa7000, len: 24576, prot: READ|WRITE, flags: PRIVATE|FIXED|DENYWRITE, fd: 3, off: 1753088) ... 0.088 sleep/11563 raw_syscalls:sys_exit:NR 9 = 140196283314176 0.091 ( ): sleep/11563 mmap(addr: 0x7f81fdaad000, len: 14976, prot: READ|WRITE, flags: PRIVATE|FIXED|ANONYMOUS) ... 0.093 sleep/11563 raw_syscalls:sys_exit:NR 9 = 140196283338752 0.097 ( ): sleep/11563 close(fd: 3) ... 0.098 sleep/11563 raw_syscalls:sys_exit:NR 3 = 0 0.107 ( ): sleep/11563 arch_prctl(option: 4098, arg2: 140196285490432) ... 0.108 sleep/11563 raw_syscalls:sys_exit:NR 158 = 0 0.143 ( ): sleep/11563 mprotect(start: 0x7f81fdaa7000, len: 16384, prot: READ) ... 0.146 sleep/11563 raw_syscalls:sys_exit:NR 10 = 0 0.157 ( ): sleep/11563 mprotect(start: 0x561d037e7000, len: 4096, prot: READ) ... 0.160 sleep/11563 raw_syscalls:sys_exit:NR 10 = 0 0.163 ( ): sleep/11563 mprotect(start: 0x7f81fdcd5000, len: 4096, prot: READ) ... 0.165 sleep/11563 raw_syscalls:sys_exit:NR 10 = 0 0.166 ( ): sleep/11563 munmap(addr: 0x7f81fdcbb000, len: 103334) ... 0.174 sleep/11563 raw_syscalls:sys_exit:NR 11 = 0 0.216 ( ): sleep/11563 brk() ... 0.217 sleep/11563 raw_syscalls:sys_exit:NR 12 = 94682642325504 0.217 ( ): sleep/11563 brk(brk: 0x561d05453000) ... 0.219 sleep/11563 raw_syscalls:sys_exit:NR 12 = 94682642460672 0.220 ( ): sleep/11563 brk() ... 0.221 sleep/11563 raw_syscalls:sys_exit:NR 12 = 94682642460672 0.224 ( ): sleep/11563 open(filename: , flags: CLOEXEC) ... 0.228 sleep/11563 raw_syscalls:sys_exit:NR 2 = 3 0.229 ( ): sleep/11563 fstat(fd: 3, statbuf: 0x7f81fdaacaa0) ... 0.230 sleep/11563 raw_syscalls:sys_exit:NR 5 = 0 0.231 ( ): sleep/11563 mmap(len: 113045344, prot: READ, flags: PRIVATE, fd: 3) ... 0.234 sleep/11563 raw_syscalls:sys_exit:NR 9 = 140196166418432 0.237 ( ): sleep/11563 close(fd: 3) ... 0.238 sleep/11563 raw_syscalls:sys_exit:NR 3 = 0 0.262 ( ): sleep/11563 nanosleep(rqtp: 0x7ffdbf7126f0) ... 1000.399 sleep/11563 raw_syscalls:sys_exit:NR 35 = 0 1000.440 ( ): sleep/11563 close(fd: 1) ... 1000.447 sleep/11563 raw_syscalls:sys_exit:NR 3 = 0 1000.454 ( ): sleep/11563 close(fd: 2) ... 1000.468 ( ): sleep/11563 exit_group( ) # In the next csets we'll connect those events to the existing enter/exit raw_syscalls handlers in 'perf trace', just like we did with the syscalls:sys_{enter,exit}_* tracepoints. Cc: Adrian Hunter <adrian.hunter@intel.com> Cc: David Ahern <dsahern@gmail.com> Cc: Jiri Olsa <jolsa@kernel.org> Cc: Namhyung Kim <namhyung@kernel.org> Cc: Wang Nan <wangnan0@huawei.com> Link: https://lkml.kernel.org/n/tip-5nl8l4hx1tl9pqdx65nkp6pw@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com> --- .../examples/bpf/augmented_raw_syscalls.c | 59 +++++++++++++++++++ 1 file changed, 59 insertions(+) create mode 100644 tools/perf/examples/bpf/augmented_raw_syscalls.c diff --git a/tools/perf/examples/bpf/augmented_raw_syscalls.c b/tools/perf/examples/bpf/augmented_raw_syscalls.c new file mode 100644 index 0000000000000..cde91c34b1016 --- /dev/null +++ b/tools/perf/examples/bpf/augmented_raw_syscalls.c @@ -0,0 +1,59 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Augment the raw_syscalls tracepoints with the contents of the pointer arguments. + * + * Test it with: + * + * perf trace -e tools/perf/examples/bpf/augmented_raw_syscalls.c cat /etc/passwd > /dev/null + * + * This exactly matches what is marshalled into the raw_syscall:sys_enter + * payload expected by the 'perf trace' beautifiers. + * + * For now it just uses the existing tracepoint augmentation code in 'perf + * trace', in the next csets we'll hook up these with the sys_enter/sys_exit + * code that will combine entry/exit in a strace like way. + */ + +#include <stdio.h> +#include <linux/socket.h> + +/* bpf-output associated map */ +struct bpf_map SEC("maps") __augmented_syscalls__ = { + .type = BPF_MAP_TYPE_PERF_EVENT_ARRAY, + .key_size = sizeof(int), + .value_size = sizeof(u32), + .max_entries = __NR_CPUS__, +}; + +struct syscall_enter_args { + unsigned long long common_tp_fields; + long syscall_nr; + unsigned long args[6]; +}; + +struct syscall_exit_args { + unsigned long long common_tp_fields; + long syscall_nr; + long ret; +}; + +SEC("raw_syscalls:sys_enter") +int sys_enter(struct syscall_enter_args *args) +{ + struct { + struct syscall_enter_args args; + } augmented_args; + unsigned int len = sizeof(augmented_args); + + probe_read(&augmented_args.args, sizeof(augmented_args.args), args); + perf_event_output(args, &__augmented_syscalls__, BPF_F_CURRENT_CPU, &augmented_args, len); + return 0; +} + +SEC("raw_syscalls:sys_exit") +int sys_exit(struct syscall_exit_args *args) +{ + return 1; /* 0 as soon as we start copying data returned by the kernel, e.g. 'read' */ +} + +license(GPL); From 3c5e3dabf3722a883227623a4adf61976c2224ff Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo <acme@redhat.com> Date: Thu, 1 Nov 2018 13:50:35 -0300 Subject: [PATCH 025/443] perf trace: When augmenting raw_syscalls plug raw_syscalls:sys_exit too With just this commit we get to support all syscalls via hooking raw_syscalls:sys_{enter,exit} to the trace__sys_{enter,exit} routines to combine, strace-like, those tracepoints. # trace -e tools/perf/examples/bpf/augmented_raw_syscalls.c sleep 1 ? ( ): sleep/31680 ... [continued]: execve()) = 0 0.043 ( 0.004 ms): sleep/31680 brk() = 0x55652a851000 0.070 ( 0.009 ms): sleep/31680 access(filename:, mode: R) = -1 ENOENT No such file or directory 0.087 ( 0.006 ms): sleep/31680 openat(dfd: CWD, filename: , flags: CLOEXEC) = 3 0.096 ( 0.003 ms): sleep/31680 fstat(fd: 3, statbuf: 0x7ffc5269e190) = 0 0.101 ( 0.005 ms): sleep/31680 mmap(len: 103334, prot: READ, flags: PRIVATE, fd: 3) = 0x7f709c239000 0.109 ( 0.002 ms): sleep/31680 close(fd: 3) = 0 0.126 ( 0.006 ms): sleep/31680 openat(dfd: CWD, filename: , flags: CLOEXEC) = 3 0.135 ( 0.003 ms): sleep/31680 read(fd: 3, buf: 0x7ffc5269e358, count: 832) = 832 0.141 ( 0.002 ms): sleep/31680 fstat(fd: 3, statbuf: 0x7ffc5269e1f0) = 0 0.146 ( 0.005 ms): sleep/31680 mmap(len: 8192, prot: READ|WRITE, flags: PRIVATE|ANONYMOUS) = 0x7f709c237000 0.159 ( 0.007 ms): sleep/31680 mmap(len: 3889792, prot: EXEC|READ, flags: PRIVATE|DENYWRITE, fd: 3) = 0x7f709bc79000 0.168 ( 0.009 ms): sleep/31680 mprotect(start: 0x7f709be26000, len: 2093056) = 0 0.179 ( 0.010 ms): sleep/31680 mmap(addr: 0x7f709c025000, len: 24576, prot: READ|WRITE, flags: PRIVATE|FIXED|DENYWRITE, fd: 3, off: 1753088) = 0x7f709c025000 0.196 ( 0.005 ms): sleep/31680 mmap(addr: 0x7f709c02b000, len: 14976, prot: READ|WRITE, flags: PRIVATE|FIXED|ANONYMOUS) = 0x7f709c02b000 0.210 ( 0.002 ms): sleep/31680 close(fd: 3) = 0 0.230 ( 0.002 ms): sleep/31680 arch_prctl(option: 4098, arg2: 140121632638208) = 0 0.306 ( 0.009 ms): sleep/31680 mprotect(start: 0x7f709c025000, len: 16384, prot: READ) = 0 0.338 ( 0.005 ms): sleep/31680 mprotect(start: 0x556529607000, len: 4096, prot: READ) = 0 0.348 ( 0.005 ms): sleep/31680 mprotect(start: 0x7f709c253000, len: 4096, prot: READ) = 0 0.356 ( 0.019 ms): sleep/31680 munmap(addr: 0x7f709c239000, len: 103334) = 0 0.463 ( 0.002 ms): sleep/31680 brk() = 0x55652a851000 0.468 ( 0.004 ms): sleep/31680 brk(brk: 0x55652a872000) = 0x55652a872000 0.474 ( 0.002 ms): sleep/31680 brk() = 0x55652a872000 0.484 ( 0.008 ms): sleep/31680 open(filename: , flags: CLOEXEC) = 3 0.497 ( 0.002 ms): sleep/31680 fstat(fd: 3, statbuf: 0x7f709c02aaa0) = 0 0.501 ( 0.006 ms): sleep/31680 mmap(len: 113045344, prot: READ, flags: PRIVATE, fd: 3) = 0x7f70950aa000 0.514 ( 0.002 ms): sleep/31680 close(fd: 3) = 0 0.554 (1000.140 ms): sleep/31680 nanosleep(rqtp: 0x7ffc5269eed0) = 0 1000.734 ( 0.007 ms): sleep/31680 close(fd: 1) = 0 1000.748 ( 0.004 ms): sleep/31680 close(fd: 2) = 0 1000.769 ( ): sleep/31680 exit_group() # Now to allow selecting which syscalls should be traced, using a map. Cc: Adrian Hunter <adrian.hunter@intel.com> Cc: David Ahern <dsahern@gmail.com> Cc: Jiri Olsa <jolsa@kernel.org> Cc: Namhyung Kim <namhyung@kernel.org> Cc: Wang Nan <wangnan0@huawei.com> Link: https://lkml.kernel.org/n/tip-votqqmqhag8e1i9mgyzfez3o@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com> --- tools/perf/builtin-trace.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/tools/perf/builtin-trace.c b/tools/perf/builtin-trace.c index dc8a6c4986ce2..f582ca575883e 100644 --- a/tools/perf/builtin-trace.c +++ b/tools/perf/builtin-trace.c @@ -3501,7 +3501,8 @@ int cmd_trace(int argc, const char **argv) evsel->handler = trace__sys_enter; evlist__for_each_entry(trace.evlist, evsel) { - if (strstarts(perf_evsel__name(evsel), "syscalls:sys_exit_")) { + if (strstarts(perf_evsel__name(evsel), "syscalls:sys_exit_") || + strcmp(perf_evsel__name(evsel), "raw_syscalls:sys_exit") == 0) { perf_evsel__init_augmented_syscall_tp(evsel); perf_evsel__init_augmented_syscall_tp_ret(evsel); evsel->handler = trace__sys_exit; From 7de414a9dd91426318df7b63da024b2b07e53df5 Mon Sep 17 00:00:00 2001 From: Cong Wang <xiyou.wangcong@gmail.com> Date: Thu, 1 Nov 2018 12:02:37 -0700 Subject: [PATCH 026/443] net: drop skb on failure in ip_check_defrag() Most callers of pskb_trim_rcsum() simply drop the skb when it fails, however, ip_check_defrag() still continues to pass the skb up to stack. This is suspicious. In ip_check_defrag(), after we learn the skb is an IP fragment, passing the skb to callers makes no sense, because callers expect fragments are defrag'ed on success. So, dropping the skb when we can't defrag it is reasonable. Note, prior to commit 88078d98d1bb, this is not a big problem as checksum will be fixed up anyway. After it, the checksum is not correct on failure. Found this during code review. Fixes: 88078d98d1bb ("net: pskb_trim_rcsum() and CHECKSUM_COMPLETE are friends") Cc: Eric Dumazet <edumazet@google.com> Signed-off-by: Cong Wang <xiyou.wangcong@gmail.com> Reviewed-by: Eric Dumazet <edumazet@google.com> Signed-off-by: David S. Miller <davem@davemloft.net> --- net/ipv4/ip_fragment.c | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/net/ipv4/ip_fragment.c b/net/ipv4/ip_fragment.c index 9b0158fa431f2..d6ee343fdb864 100644 --- a/net/ipv4/ip_fragment.c +++ b/net/ipv4/ip_fragment.c @@ -722,10 +722,14 @@ struct sk_buff *ip_check_defrag(struct net *net, struct sk_buff *skb, u32 user) if (ip_is_fragment(&iph)) { skb = skb_share_check(skb, GFP_ATOMIC); if (skb) { - if (!pskb_may_pull(skb, netoff + iph.ihl * 4)) - return skb; - if (pskb_trim_rcsum(skb, netoff + len)) - return skb; + if (!pskb_may_pull(skb, netoff + iph.ihl * 4)) { + kfree_skb(skb); + return NULL; + } + if (pskb_trim_rcsum(skb, netoff + len)) { + kfree_skb(skb); + return NULL; + } memset(IPCB(skb), 0, sizeof(struct inet_skb_parm)); if (ip_defrag(net, skb, user)) return NULL; From a8874e7e8a8896f2b6c641f4b8e2473eafd35204 Mon Sep 17 00:00:00 2001 From: Martin Schwidefsky <schwidefsky@de.ibm.com> Date: Wed, 31 Oct 2018 12:11:48 +0100 Subject: [PATCH 027/443] mm: make the __PAGETABLE_PxD_FOLDED defines non-empty Change the currently empty defines for __PAGETABLE_PMD_FOLDED, __PAGETABLE_PUD_FOLDED and __PAGETABLE_P4D_FOLDED to return 1. This makes it possible to use __is_defined() to test if the preprocessor define exists. Acked-by: Kirill A. Shutemov <kirill.shutemov@linux.intel.com> Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com> --- arch/arm/include/asm/pgtable-2level.h | 2 +- arch/m68k/include/asm/pgtable_mm.h | 4 ++-- arch/microblaze/include/asm/pgtable.h | 2 +- arch/nds32/include/asm/pgtable.h | 2 +- arch/parisc/include/asm/pgtable.h | 2 +- include/asm-generic/4level-fixup.h | 2 +- include/asm-generic/5level-fixup.h | 2 +- include/asm-generic/pgtable-nop4d-hack.h | 2 +- include/asm-generic/pgtable-nop4d.h | 2 +- include/asm-generic/pgtable-nopmd.h | 2 +- include/asm-generic/pgtable-nopud.h | 2 +- 11 files changed, 12 insertions(+), 12 deletions(-) diff --git a/arch/arm/include/asm/pgtable-2level.h b/arch/arm/include/asm/pgtable-2level.h index 92fd2c8a9af06..12659ce5c1f38 100644 --- a/arch/arm/include/asm/pgtable-2level.h +++ b/arch/arm/include/asm/pgtable-2level.h @@ -10,7 +10,7 @@ #ifndef _ASM_PGTABLE_2LEVEL_H #define _ASM_PGTABLE_2LEVEL_H -#define __PAGETABLE_PMD_FOLDED +#define __PAGETABLE_PMD_FOLDED 1 /* * Hardware-wise, we have a two level page table structure, where the first diff --git a/arch/m68k/include/asm/pgtable_mm.h b/arch/m68k/include/asm/pgtable_mm.h index 6181e4134483c..fe3ddd73a0ccb 100644 --- a/arch/m68k/include/asm/pgtable_mm.h +++ b/arch/m68k/include/asm/pgtable_mm.h @@ -55,12 +55,12 @@ */ #ifdef CONFIG_SUN3 #define PTRS_PER_PTE 16 -#define __PAGETABLE_PMD_FOLDED +#define __PAGETABLE_PMD_FOLDED 1 #define PTRS_PER_PMD 1 #define PTRS_PER_PGD 2048 #elif defined(CONFIG_COLDFIRE) #define PTRS_PER_PTE 512 -#define __PAGETABLE_PMD_FOLDED +#define __PAGETABLE_PMD_FOLDED 1 #define PTRS_PER_PMD 1 #define PTRS_PER_PGD 1024 #else diff --git a/arch/microblaze/include/asm/pgtable.h b/arch/microblaze/include/asm/pgtable.h index f64ebb9c9a413..e14b6621c933e 100644 --- a/arch/microblaze/include/asm/pgtable.h +++ b/arch/microblaze/include/asm/pgtable.h @@ -63,7 +63,7 @@ extern int mem_init_done; #include <asm-generic/4level-fixup.h> -#define __PAGETABLE_PMD_FOLDED +#define __PAGETABLE_PMD_FOLDED 1 #ifdef __KERNEL__ #ifndef __ASSEMBLY__ diff --git a/arch/nds32/include/asm/pgtable.h b/arch/nds32/include/asm/pgtable.h index d3e19a55cf530..9f52db930c004 100644 --- a/arch/nds32/include/asm/pgtable.h +++ b/arch/nds32/include/asm/pgtable.h @@ -4,7 +4,7 @@ #ifndef _ASMNDS32_PGTABLE_H #define _ASMNDS32_PGTABLE_H -#define __PAGETABLE_PMD_FOLDED +#define __PAGETABLE_PMD_FOLDED 1 #include <asm-generic/4level-fixup.h> #include <asm-generic/sizes.h> diff --git a/arch/parisc/include/asm/pgtable.h b/arch/parisc/include/asm/pgtable.h index b941ac7d4e70b..c7bb74e224360 100644 --- a/arch/parisc/include/asm/pgtable.h +++ b/arch/parisc/include/asm/pgtable.h @@ -111,7 +111,7 @@ static inline void purge_tlb_entries(struct mm_struct *mm, unsigned long addr) #if CONFIG_PGTABLE_LEVELS == 3 #define BITS_PER_PMD (PAGE_SHIFT + PMD_ORDER - BITS_PER_PMD_ENTRY) #else -#define __PAGETABLE_PMD_FOLDED +#define __PAGETABLE_PMD_FOLDED 1 #define BITS_PER_PMD 0 #endif #define PTRS_PER_PMD (1UL << BITS_PER_PMD) diff --git a/include/asm-generic/4level-fixup.h b/include/asm-generic/4level-fixup.h index 89f3b03b14451..e3667c9a33a5d 100644 --- a/include/asm-generic/4level-fixup.h +++ b/include/asm-generic/4level-fixup.h @@ -3,7 +3,7 @@ #define _4LEVEL_FIXUP_H #define __ARCH_HAS_4LEVEL_HACK -#define __PAGETABLE_PUD_FOLDED +#define __PAGETABLE_PUD_FOLDED 1 #define PUD_SHIFT PGDIR_SHIFT #define PUD_SIZE PGDIR_SIZE diff --git a/include/asm-generic/5level-fixup.h b/include/asm-generic/5level-fixup.h index 9c2e0708eb82f..73474bb52344d 100644 --- a/include/asm-generic/5level-fixup.h +++ b/include/asm-generic/5level-fixup.h @@ -3,7 +3,7 @@ #define _5LEVEL_FIXUP_H #define __ARCH_HAS_5LEVEL_HACK -#define __PAGETABLE_P4D_FOLDED +#define __PAGETABLE_P4D_FOLDED 1 #define P4D_SHIFT PGDIR_SHIFT #define P4D_SIZE PGDIR_SIZE diff --git a/include/asm-generic/pgtable-nop4d-hack.h b/include/asm-generic/pgtable-nop4d-hack.h index 0c34215263b8a..1d6dd38c0e5ea 100644 --- a/include/asm-generic/pgtable-nop4d-hack.h +++ b/include/asm-generic/pgtable-nop4d-hack.h @@ -5,7 +5,7 @@ #ifndef __ASSEMBLY__ #include <asm-generic/5level-fixup.h> -#define __PAGETABLE_PUD_FOLDED +#define __PAGETABLE_PUD_FOLDED 1 /* * Having the pud type consist of a pgd gets the size right, and allows diff --git a/include/asm-generic/pgtable-nop4d.h b/include/asm-generic/pgtable-nop4d.h index 1a29b2a0282bf..04cb913797bc0 100644 --- a/include/asm-generic/pgtable-nop4d.h +++ b/include/asm-generic/pgtable-nop4d.h @@ -4,7 +4,7 @@ #ifndef __ASSEMBLY__ -#define __PAGETABLE_P4D_FOLDED +#define __PAGETABLE_P4D_FOLDED 1 typedef struct { pgd_t pgd; } p4d_t; diff --git a/include/asm-generic/pgtable-nopmd.h b/include/asm-generic/pgtable-nopmd.h index f35f6e8149e47..b85b8271a73de 100644 --- a/include/asm-generic/pgtable-nopmd.h +++ b/include/asm-generic/pgtable-nopmd.h @@ -8,7 +8,7 @@ struct mm_struct; -#define __PAGETABLE_PMD_FOLDED +#define __PAGETABLE_PMD_FOLDED 1 /* * Having the pmd type consist of a pud gets the size right, and allows diff --git a/include/asm-generic/pgtable-nopud.h b/include/asm-generic/pgtable-nopud.h index e950b9c50f34f..9bef475db6fef 100644 --- a/include/asm-generic/pgtable-nopud.h +++ b/include/asm-generic/pgtable-nopud.h @@ -9,7 +9,7 @@ #else #include <asm-generic/pgtable-nop4d.h> -#define __PAGETABLE_PUD_FOLDED +#define __PAGETABLE_PUD_FOLDED 1 /* * Having the pud type consist of a p4d gets the size right, and allows From 1071fc5779d9846fec56a4ff6089ab08cac1ab72 Mon Sep 17 00:00:00 2001 From: Martin Schwidefsky <schwidefsky@de.ibm.com> Date: Mon, 15 Oct 2018 10:25:57 +0200 Subject: [PATCH 028/443] mm: introduce mm_[p4d|pud|pmd]_folded Add three architecture overrideable functions to test if the p4d, pud, or pmd layer of a page table is folded or not. Acked-by: Kirill A. Shutemov <kirill.shutemov@linux.intel.com> Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com> --- include/asm-generic/pgtable.h | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/include/asm-generic/pgtable.h b/include/asm-generic/pgtable.h index 88ebc6102c7cc..15fd0277ffa69 100644 --- a/include/asm-generic/pgtable.h +++ b/include/asm-generic/pgtable.h @@ -1127,4 +1127,20 @@ static inline bool arch_has_pfn_modify_check(void) #endif #endif +/* + * On some architectures it depends on the mm if the p4d/pud or pmd + * layer of the page table hierarchy is folded or not. + */ +#ifndef mm_p4d_folded +#define mm_p4d_folded(mm) __is_defined(__PAGETABLE_P4D_FOLDED) +#endif + +#ifndef mm_pud_folded +#define mm_pud_folded(mm) __is_defined(__PAGETABLE_PUD_FOLDED) +#endif + +#ifndef mm_pmd_folded +#define mm_pmd_folded(mm) __is_defined(__PAGETABLE_PMD_FOLDED) +#endif + #endif /* _ASM_GENERIC_PGTABLE_H */ From 6d212db11947ae5464e4717536ed9faf61c01e86 Mon Sep 17 00:00:00 2001 From: Martin Schwidefsky <schwidefsky@de.ibm.com> Date: Mon, 15 Oct 2018 10:30:23 +0200 Subject: [PATCH 029/443] mm: add mm_pxd_folded checks to pgtable_bytes accounting functions The common mm code calls mm_dec_nr_pmds() and mm_dec_nr_puds() in free_pgtables() if the address range spans a full pud or pmd. If mm_dec_nr_puds/mm_dec_nr_pmds are non-empty due to configuration settings they blindly subtract the size of the pmd or pud table from pgtable_bytes even if the pud or pmd page table layer is folded. Add explicit mm_[pmd|pud]_folded checks to the four pgtable_bytes accounting functions mm_inc_nr_puds, mm_inc_nr_pmds, mm_dec_nr_puds and mm_dec_nr_pmds. As the check for folded page tables can be overwritten by the architecture, this allows to keep a correct pgtable_bytes value for platforms that use a dynamic number of page table levels. Acked-by: Kirill A. Shutemov <kirill.shutemov@linux.intel.com> Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com> --- include/linux/mm.h | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/include/linux/mm.h b/include/linux/mm.h index daa2b8f1e9a88..a3701e91bb576 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -1742,11 +1742,15 @@ int __pud_alloc(struct mm_struct *mm, p4d_t *p4d, unsigned long address); static inline void mm_inc_nr_puds(struct mm_struct *mm) { + if (mm_pud_folded(mm)) + return; atomic_long_add(PTRS_PER_PUD * sizeof(pud_t), &mm->pgtables_bytes); } static inline void mm_dec_nr_puds(struct mm_struct *mm) { + if (mm_pud_folded(mm)) + return; atomic_long_sub(PTRS_PER_PUD * sizeof(pud_t), &mm->pgtables_bytes); } #endif @@ -1766,11 +1770,15 @@ int __pmd_alloc(struct mm_struct *mm, pud_t *pud, unsigned long address); static inline void mm_inc_nr_pmds(struct mm_struct *mm) { + if (mm_pmd_folded(mm)) + return; atomic_long_add(PTRS_PER_PMD * sizeof(pmd_t), &mm->pgtables_bytes); } static inline void mm_dec_nr_pmds(struct mm_struct *mm) { + if (mm_pmd_folded(mm)) + return; atomic_long_sub(PTRS_PER_PMD * sizeof(pmd_t), &mm->pgtables_bytes); } #endif From e12e4044aede97974f2222eb7f0ed726a5179a32 Mon Sep 17 00:00:00 2001 From: Martin Schwidefsky <schwidefsky@de.ibm.com> Date: Mon, 15 Oct 2018 11:09:16 +0200 Subject: [PATCH 030/443] s390/mm: fix mis-accounting of pgtable_bytes In case a fork or a clone system fails in copy_process and the error handling does the mmput() at the bad_fork_cleanup_mm label, the following warning messages will appear on the console: BUG: non-zero pgtables_bytes on freeing mm: 16384 The reason for that is the tricks we play with mm_inc_nr_puds() and mm_inc_nr_pmds() in init_new_context(). A normal 64-bit process has 3 levels of page table, the p4d level and the pud level are folded. On process termination the free_pud_range() function in mm/memory.c will subtract 16KB from pgtable_bytes with a mm_dec_nr_puds() call, but there actually is not really a pud table. One issue with this is the fact that pgtable_bytes is usually off by a few kilobytes, but the more severe problem is that for a failed fork or clone the free_pgtables() function is not called. In this case there is no mm_dec_nr_puds() or mm_dec_nr_pmds() that go together with the mm_inc_nr_puds() and mm_inc_nr_pmds in init_new_context(). The pgtable_bytes will be off by 16384 or 32768 bytes and we get the BUG message. The message itself is purely cosmetic, but annoying. To fix this override the mm_pmd_folded, mm_pud_folded and mm_p4d_folded function to check for the true size of the address space. Reported-by: Li Wang <liwang@redhat.com> Tested-by: Li Wang <liwang@redhat.com> Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com> --- arch/s390/include/asm/mmu_context.h | 5 ----- arch/s390/include/asm/pgalloc.h | 6 +++--- arch/s390/include/asm/pgtable.h | 18 ++++++++++++++++++ arch/s390/include/asm/tlb.h | 6 +++--- arch/s390/mm/pgalloc.c | 1 + 5 files changed, 25 insertions(+), 11 deletions(-) diff --git a/arch/s390/include/asm/mmu_context.h b/arch/s390/include/asm/mmu_context.h index dbd689d556ce5..ccbb53e220240 100644 --- a/arch/s390/include/asm/mmu_context.h +++ b/arch/s390/include/asm/mmu_context.h @@ -46,8 +46,6 @@ static inline int init_new_context(struct task_struct *tsk, mm->context.asce_limit = STACK_TOP_MAX; mm->context.asce = __pa(mm->pgd) | _ASCE_TABLE_LENGTH | _ASCE_USER_BITS | _ASCE_TYPE_REGION3; - /* pgd_alloc() did not account this pud */ - mm_inc_nr_puds(mm); break; case -PAGE_SIZE: /* forked 5-level task, set new asce with new_mm->pgd */ @@ -63,9 +61,6 @@ static inline int init_new_context(struct task_struct *tsk, /* forked 2-level compat task, set new asce with new mm->pgd */ mm->context.asce = __pa(mm->pgd) | _ASCE_TABLE_LENGTH | _ASCE_USER_BITS | _ASCE_TYPE_SEGMENT; - /* pgd_alloc() did not account this pmd */ - mm_inc_nr_pmds(mm); - mm_inc_nr_puds(mm); } crst_table_init((unsigned long *) mm->pgd, pgd_entry_type(mm)); return 0; diff --git a/arch/s390/include/asm/pgalloc.h b/arch/s390/include/asm/pgalloc.h index f0f9bcf94c037..5ee733720a571 100644 --- a/arch/s390/include/asm/pgalloc.h +++ b/arch/s390/include/asm/pgalloc.h @@ -36,11 +36,11 @@ static inline void crst_table_init(unsigned long *crst, unsigned long entry) static inline unsigned long pgd_entry_type(struct mm_struct *mm) { - if (mm->context.asce_limit <= _REGION3_SIZE) + if (mm_pmd_folded(mm)) return _SEGMENT_ENTRY_EMPTY; - if (mm->context.asce_limit <= _REGION2_SIZE) + if (mm_pud_folded(mm)) return _REGION3_ENTRY_EMPTY; - if (mm->context.asce_limit <= _REGION1_SIZE) + if (mm_p4d_folded(mm)) return _REGION2_ENTRY_EMPTY; return _REGION1_ENTRY_EMPTY; } diff --git a/arch/s390/include/asm/pgtable.h b/arch/s390/include/asm/pgtable.h index 411d435e7a7d2..063732414dfbb 100644 --- a/arch/s390/include/asm/pgtable.h +++ b/arch/s390/include/asm/pgtable.h @@ -493,6 +493,24 @@ static inline int is_module_addr(void *addr) _REGION_ENTRY_PROTECT | \ _REGION_ENTRY_NOEXEC) +static inline bool mm_p4d_folded(struct mm_struct *mm) +{ + return mm->context.asce_limit <= _REGION1_SIZE; +} +#define mm_p4d_folded(mm) mm_p4d_folded(mm) + +static inline bool mm_pud_folded(struct mm_struct *mm) +{ + return mm->context.asce_limit <= _REGION2_SIZE; +} +#define mm_pud_folded(mm) mm_pud_folded(mm) + +static inline bool mm_pmd_folded(struct mm_struct *mm) +{ + return mm->context.asce_limit <= _REGION3_SIZE; +} +#define mm_pmd_folded(mm) mm_pmd_folded(mm) + static inline int mm_has_pgste(struct mm_struct *mm) { #ifdef CONFIG_PGSTE diff --git a/arch/s390/include/asm/tlb.h b/arch/s390/include/asm/tlb.h index 457b7ba0fbb66..b31c779cf5817 100644 --- a/arch/s390/include/asm/tlb.h +++ b/arch/s390/include/asm/tlb.h @@ -136,7 +136,7 @@ static inline void pte_free_tlb(struct mmu_gather *tlb, pgtable_t pte, static inline void pmd_free_tlb(struct mmu_gather *tlb, pmd_t *pmd, unsigned long address) { - if (tlb->mm->context.asce_limit <= _REGION3_SIZE) + if (mm_pmd_folded(tlb->mm)) return; pgtable_pmd_page_dtor(virt_to_page(pmd)); tlb_remove_table(tlb, pmd); @@ -152,7 +152,7 @@ static inline void pmd_free_tlb(struct mmu_gather *tlb, pmd_t *pmd, static inline void p4d_free_tlb(struct mmu_gather *tlb, p4d_t *p4d, unsigned long address) { - if (tlb->mm->context.asce_limit <= _REGION1_SIZE) + if (mm_p4d_folded(tlb->mm)) return; tlb_remove_table(tlb, p4d); } @@ -167,7 +167,7 @@ static inline void p4d_free_tlb(struct mmu_gather *tlb, p4d_t *p4d, static inline void pud_free_tlb(struct mmu_gather *tlb, pud_t *pud, unsigned long address) { - if (tlb->mm->context.asce_limit <= _REGION2_SIZE) + if (mm_pud_folded(tlb->mm)) return; tlb_remove_table(tlb, pud); } diff --git a/arch/s390/mm/pgalloc.c b/arch/s390/mm/pgalloc.c index 76d89ee8b4288..814f26520aa2c 100644 --- a/arch/s390/mm/pgalloc.c +++ b/arch/s390/mm/pgalloc.c @@ -101,6 +101,7 @@ int crst_table_upgrade(struct mm_struct *mm, unsigned long end) mm->context.asce_limit = _REGION1_SIZE; mm->context.asce = __pa(mm->pgd) | _ASCE_TABLE_LENGTH | _ASCE_USER_BITS | _ASCE_TYPE_REGION2; + mm_inc_nr_puds(mm); } else { crst_table_init(table, _REGION1_ENTRY_EMPTY); pgd_populate(mm, (pgd_t *) table, (p4d_t *) pgd); From c43e1c5a801fdde1aad0a2f9ed948753b5275d56 Mon Sep 17 00:00:00 2001 From: Thomas Richter <tmricht@linux.ibm.com> Date: Thu, 25 Oct 2018 09:04:05 +0100 Subject: [PATCH 031/443] s390/cpum_sf: Rework attribute definition for diagnostic sampling Previously, the attribute entry for diagnostic sampling was added if authorized. Otherwise, the array of struct attribute contains two NULL values. Change this logic and reserve space for the attribute for diagnostic sampling. If diagnostic sampling is authorized, add an entry in the respective position in the array of struct attribute. Signed-off-by: Thomas Richter <tmricht@linux.ibm.com> Reviewed-by: Hendrik Brueckner <brueckner@linux.ibm.com> Suggested-by: Heiko Carstens <heiko.carstens@de.ibm.com> Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com> --- arch/s390/kernel/perf_cpum_sf.c | 33 ++++++++++++++++++++++++++++----- 1 file changed, 28 insertions(+), 5 deletions(-) diff --git a/arch/s390/kernel/perf_cpum_sf.c b/arch/s390/kernel/perf_cpum_sf.c index 7bf604ff50a1b..bfabeb1889cc0 100644 --- a/arch/s390/kernel/perf_cpum_sf.c +++ b/arch/s390/kernel/perf_cpum_sf.c @@ -1842,10 +1842,30 @@ static void cpumsf_pmu_del(struct perf_event *event, int flags) CPUMF_EVENT_ATTR(SF, SF_CYCLES_BASIC, PERF_EVENT_CPUM_SF); CPUMF_EVENT_ATTR(SF, SF_CYCLES_BASIC_DIAG, PERF_EVENT_CPUM_SF_DIAG); -static struct attribute *cpumsf_pmu_events_attr[] = { - CPUMF_EVENT_PTR(SF, SF_CYCLES_BASIC), - NULL, - NULL, +/* Attribute list for CPU_SF. + * + * The availablitiy depends on the CPU_MF sampling facility authorization + * for basic + diagnositic samples. This is determined at initialization + * time by the sampling facility device driver. + * If the authorization for basic samples is turned off, it should be + * also turned off for diagnostic sampling. + * + * During initialization of the device driver, check the authorization + * level for diagnostic sampling and installs the attribute + * file for diagnostic sampling if necessary. + * + * For now install a placeholder to reference all possible attributes: + * SF_CYCLES_BASIC and SF_CYCLES_BASIC_DIAG. + * Add another entry for the final NULL pointer. + */ +enum { + SF_CYCLES_BASIC_ATTR_IDX = 0, + SF_CYCLES_BASIC_DIAG_ATTR_IDX, + SF_CYCLES_ATTR_MAX +}; + +static struct attribute *cpumsf_pmu_events_attr[SF_CYCLES_ATTR_MAX + 1] = { + [SF_CYCLES_BASIC_ATTR_IDX] = CPUMF_EVENT_PTR(SF, SF_CYCLES_BASIC) }; PMU_FORMAT_ATTR(event, "config:0-63"); @@ -2040,7 +2060,10 @@ static int __init init_cpum_sampling_pmu(void) if (si.ad) { sfb_set_limits(CPUM_SF_MIN_SDB, CPUM_SF_MAX_SDB); - cpumsf_pmu_events_attr[1] = + /* Sampling of diagnostic data authorized, + * install event into attribute list of PMU device. + */ + cpumsf_pmu_events_attr[SF_CYCLES_BASIC_DIAG_ATTR_IDX] = CPUMF_EVENT_PTR(SF, SF_CYCLES_BASIC_DIAG); } From 9fed920e6817218ad786c3f28e14b4c877cc2aed Mon Sep 17 00:00:00 2001 From: Vasily Gorbik <gor@linux.ibm.com> Date: Fri, 26 Oct 2018 15:29:59 +0200 Subject: [PATCH 032/443] s390/kasan: increase instrumented stack size to 64k Increase kasan instrumented kernel stack size from 32k to 64k. Other architectures seems to get away with just doubling kernel stack size under kasan, but on s390 this appears to be not enough due to bigger frame size. The particular pain point is kasan inlined checks (CONFIG_KASAN_INLINE vs CONFIG_KASAN_OUTLINE). With inlined checks one particular case hitting stack overflow is fs sync on xfs filesystem: #0 [9a0681e8] 704 bytes check_usage at 34b1fc #1 [9a0684a8] 432 bytes check_usage at 34c710 #2 [9a068658] 1048 bytes validate_chain at 35044a #3 [9a068a70] 312 bytes __lock_acquire at 3559fe #4 [9a068ba8] 440 bytes lock_acquire at 3576ee #5 [9a068d60] 104 bytes _raw_spin_lock at 21b44e0 #6 [9a068dc8] 1992 bytes enqueue_entity at 2dbf72 #7 [9a069590] 1496 bytes enqueue_task_fair at 2df5f0 #8 [9a069b68] 64 bytes ttwu_do_activate at 28f438 #9 [9a069ba8] 552 bytes try_to_wake_up at 298c4c #10 [9a069dd0] 168 bytes wake_up_worker at 23f97c #11 [9a069e78] 200 bytes insert_work at 23fc2e #12 [9a069f40] 648 bytes __queue_work at 2487c0 #13 [9a06a1c8] 200 bytes __queue_delayed_work at 24db28 #14 [9a06a290] 248 bytes mod_delayed_work_on at 24de84 #15 [9a06a388] 24 bytes kblockd_mod_delayed_work_on at 153e2a0 #16 [9a06a3a0] 288 bytes __blk_mq_delay_run_hw_queue at 158168c #17 [9a06a4c0] 192 bytes blk_mq_run_hw_queue at 1581a3c #18 [9a06a580] 184 bytes blk_mq_sched_insert_requests at 15a2192 #19 [9a06a638] 1024 bytes blk_mq_flush_plug_list at 1590f3a #20 [9a06aa38] 704 bytes blk_flush_plug_list at 1555028 #21 [9a06acf8] 320 bytes schedule at 219e476 #22 [9a06ae38] 760 bytes schedule_timeout at 21b0aac #23 [9a06b130] 408 bytes wait_for_common at 21a1706 #24 [9a06b2c8] 360 bytes xfs_buf_iowait at fa1540 #25 [9a06b430] 256 bytes __xfs_buf_submit at fadae6 #26 [9a06b530] 264 bytes xfs_buf_read_map at fae3f6 #27 [9a06b638] 656 bytes xfs_trans_read_buf_map at 10ac9a8 #28 [9a06b8c8] 304 bytes xfs_btree_kill_root at e72426 #29 [9a06b9f8] 288 bytes xfs_btree_lookup_get_block at e7bc5e #30 [9a06bb18] 624 bytes xfs_btree_lookup at e7e1a6 #31 [9a06bd88] 2664 bytes xfs_alloc_ag_vextent_near at dfa070 #32 [9a06c7f0] 144 bytes xfs_alloc_ag_vextent at dff3ca #33 [9a06c880] 1128 bytes xfs_alloc_vextent at e05fce #34 [9a06cce8] 584 bytes xfs_bmap_btalloc at e58342 #35 [9a06cf30] 1336 bytes xfs_bmapi_write at e618de #36 [9a06d468] 776 bytes xfs_iomap_write_allocate at ff678e #37 [9a06d770] 720 bytes xfs_map_blocks at f82af8 #38 [9a06da40] 928 bytes xfs_writepage_map at f83cd6 #39 [9a06dde0] 320 bytes xfs_do_writepage at f85872 #40 [9a06df20] 1320 bytes write_cache_pages at 73dfe8 #41 [9a06e448] 208 bytes xfs_vm_writepages at f7f892 #42 [9a06e518] 88 bytes do_writepages at 73fe6a #43 [9a06e570] 872 bytes __writeback_single_inode at a20cb6 #44 [9a06e8d8] 664 bytes writeback_sb_inodes at a23be2 #45 [9a06eb70] 296 bytes __writeback_inodes_wb at a242e0 #46 [9a06ec98] 928 bytes wb_writeback at a2500e #47 [9a06f038] 848 bytes wb_do_writeback at a260ae #48 [9a06f388] 536 bytes wb_workfn at a28228 #49 [9a06f5a0] 1088 bytes process_one_work at 24a234 #50 [9a06f9e0] 1120 bytes worker_thread at 24ba26 #51 [9a06fe40] 104 bytes kthread at 26545a #52 [9a06fea8] kernel_thread_starter at 21b6b62 To be able to increase the stack size to 64k reuse LLILL instruction in __switch_to function to load 64k - STACK_FRAME_OVERHEAD - __PT_SIZE (65192) value as unsigned. Reported-by: Benjamin Block <bblock@linux.ibm.com> Reviewed-by: Heiko Carstens <heiko.carstens@de.ibm.com> Signed-off-by: Vasily Gorbik <gor@linux.ibm.com> Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com> --- arch/s390/Makefile | 2 +- arch/s390/include/asm/thread_info.h | 2 +- arch/s390/kernel/entry.S | 6 +++--- 3 files changed, 5 insertions(+), 5 deletions(-) diff --git a/arch/s390/Makefile b/arch/s390/Makefile index 0b33577932c3b..e21053e5e0da2 100644 --- a/arch/s390/Makefile +++ b/arch/s390/Makefile @@ -27,7 +27,7 @@ KBUILD_CFLAGS_DECOMPRESSOR += $(call cc-option,-ffreestanding) KBUILD_CFLAGS_DECOMPRESSOR += $(if $(CONFIG_DEBUG_INFO),-g) KBUILD_CFLAGS_DECOMPRESSOR += $(if $(CONFIG_DEBUG_INFO_DWARF4), $(call cc-option, -gdwarf-4,)) UTS_MACHINE := s390x -STACK_SIZE := $(if $(CONFIG_KASAN),32768,16384) +STACK_SIZE := $(if $(CONFIG_KASAN),65536,16384) CHECKFLAGS += -D__s390__ -D__s390x__ export LD_BFD diff --git a/arch/s390/include/asm/thread_info.h b/arch/s390/include/asm/thread_info.h index 27248f42a03c4..ce4e17c9aad6f 100644 --- a/arch/s390/include/asm/thread_info.h +++ b/arch/s390/include/asm/thread_info.h @@ -14,7 +14,7 @@ * General size of kernel stacks */ #ifdef CONFIG_KASAN -#define THREAD_SIZE_ORDER 3 +#define THREAD_SIZE_ORDER 4 #else #define THREAD_SIZE_ORDER 2 #endif diff --git a/arch/s390/kernel/entry.S b/arch/s390/kernel/entry.S index 724fba4d09d2d..39191a0feed1c 100644 --- a/arch/s390/kernel/entry.S +++ b/arch/s390/kernel/entry.S @@ -236,10 +236,10 @@ ENTRY(__switch_to) stmg %r6,%r15,__SF_GPRS(%r15) # store gprs of prev task lghi %r4,__TASK_stack lghi %r1,__TASK_thread - lg %r5,0(%r4,%r3) # start of kernel stack of next + llill %r5,STACK_INIT stg %r15,__THREAD_ksp(%r1,%r2) # store kernel stack of prev - lgr %r15,%r5 - aghi %r15,STACK_INIT # end of kernel stack of next + lg %r15,0(%r4,%r3) # start of kernel stack of next + agr %r15,%r5 # end of kernel stack of next stg %r3,__LC_CURRENT # store task struct of next stg %r15,__LC_KERNEL_STACK # store end of kernel stack lg %r15,__THREAD_ksp(%r1,%r3) # load kernel stack of next From a541f0ebcc08ed8bc0cc492eec9a86cb280a9f24 Mon Sep 17 00:00:00 2001 From: "Justin M. Forbes" <jforbes@fedoraproject.org> Date: Wed, 31 Oct 2018 13:02:03 -0500 Subject: [PATCH 033/443] s390/mm: Fix ERROR: "__node_distance" undefined! Fixes: ERROR: "__node_distance" [drivers/nvme/host/nvme-core.ko] undefined! make[1]: *** [scripts/Makefile.modpost:92: __modpost] Error 1 make: *** [Makefile:1275: modules] Error 2 + exit 1 Signed-off-by: Justin M. Forbes <jforbes@fedoraproject.org> Signed-off-by: Heiko Carstens <heiko.carstens@de.ibm.com> Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com> --- arch/s390/numa/numa.c | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/s390/numa/numa.c b/arch/s390/numa/numa.c index 5bd374491f946..6c151b42e65db 100644 --- a/arch/s390/numa/numa.c +++ b/arch/s390/numa/numa.c @@ -54,6 +54,7 @@ int __node_distance(int a, int b) { return mode->distance ? mode->distance(a, b) : 0; } +EXPORT_SYMBOL(__node_distance); int numa_debug_enabled; From f9005571701920551bcf54a500973fb61f2e1eda Mon Sep 17 00:00:00 2001 From: Stefano Stabellini <stefanos@xilinx.com> Date: Wed, 31 Oct 2018 16:11:49 -0700 Subject: [PATCH 034/443] CONFIG_XEN_PV breaks xen_create_contiguous_region on ARM xen_create_contiguous_region has now only an implementation if CONFIG_XEN_PV is defined. However, on ARM we never set CONFIG_XEN_PV but we do have an implementation of xen_create_contiguous_region which is required for swiotlb-xen to work correctly (although it just sets *dma_handle). Cc: <stable@vger.kernel.org> # 4.12 Fixes: 16624390816c ("xen: create xen_create/destroy_contiguous_region() stubs for PVHVM only builds") Signed-off-by: Stefano Stabellini <stefanos@xilinx.com> Reviewed-by: Juergen Gross <jgross@suse.com> CC: Jeff.Kubascik@dornerworks.com CC: Jarvis.Roach@dornerworks.com CC: Nathan.Studer@dornerworks.com CC: vkuznets@redhat.com CC: boris.ostrovsky@oracle.com CC: jgross@suse.com CC: julien.grall@arm.com Signed-off-by: Juergen Gross <jgross@suse.com> --- include/xen/xen-ops.h | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/include/xen/xen-ops.h b/include/xen/xen-ops.h index 18803ff76e278..4969817124a8d 100644 --- a/include/xen/xen-ops.h +++ b/include/xen/xen-ops.h @@ -42,16 +42,12 @@ int xen_setup_shutdown_event(void); extern unsigned long *xen_contiguous_bitmap; -#ifdef CONFIG_XEN_PV +#if defined(CONFIG_XEN_PV) || defined(CONFIG_ARM) || defined(CONFIG_ARM64) int xen_create_contiguous_region(phys_addr_t pstart, unsigned int order, unsigned int address_bits, dma_addr_t *dma_handle); void xen_destroy_contiguous_region(phys_addr_t pstart, unsigned int order); - -int xen_remap_pfn(struct vm_area_struct *vma, unsigned long addr, - xen_pfn_t *pfn, int nr, int *err_ptr, pgprot_t prot, - unsigned int domid, bool no_translate, struct page **pages); #else static inline int xen_create_contiguous_region(phys_addr_t pstart, unsigned int order, @@ -63,7 +59,13 @@ static inline int xen_create_contiguous_region(phys_addr_t pstart, static inline void xen_destroy_contiguous_region(phys_addr_t pstart, unsigned int order) { } +#endif +#if defined(CONFIG_XEN_PV) +int xen_remap_pfn(struct vm_area_struct *vma, unsigned long addr, + xen_pfn_t *pfn, int nr, int *err_ptr, pgprot_t prot, + unsigned int domid, bool no_translate, struct page **pages); +#else static inline int xen_remap_pfn(struct vm_area_struct *vma, unsigned long addr, xen_pfn_t *pfn, int nr, int *err_ptr, pgprot_t prot, unsigned int domid, From df0734702a7cbba49d6765bd5ba069340bf9c5db Mon Sep 17 00:00:00 2001 From: Song Liu <songliubraving@fb.com> Date: Fri, 2 Nov 2018 10:16:15 -0700 Subject: [PATCH 035/443] bpf: show real jited prog address in /proc/kallsyms Currently, /proc/kallsyms shows page address of jited bpf program. The main reason here is to not expose randomized start address. However, This is not ideal for detailed profiling (find hot instructions from stack traces). This patch replaces the page address with real prog start address. This change is OK because these addresses are still protected by sysctl kptr_restrict (see kallsyms_show_value()), and only programs loaded by root are added to kallsyms (see bpf_prog_kallsyms_add()). Signed-off-by: Song Liu <songliubraving@fb.com> Signed-off-by: Daniel Borkmann <daniel@iogearbox.net> --- kernel/bpf/core.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c index 6377225b20820..1a796e0799ec4 100644 --- a/kernel/bpf/core.c +++ b/kernel/bpf/core.c @@ -553,7 +553,6 @@ bool is_bpf_text_address(unsigned long addr) int bpf_get_kallsym(unsigned int symnum, unsigned long *value, char *type, char *sym) { - unsigned long symbol_start, symbol_end; struct bpf_prog_aux *aux; unsigned int it = 0; int ret = -ERANGE; @@ -566,10 +565,9 @@ int bpf_get_kallsym(unsigned int symnum, unsigned long *value, char *type, if (it++ != symnum) continue; - bpf_get_prog_addr_region(aux->prog, &symbol_start, &symbol_end); bpf_get_prog_name(aux->prog, sym); - *value = symbol_start; + *value = (unsigned long)aux->prog->bpf_func; *type = BPF_SYM_ELF_TYPE; ret = 0; From de57e99ceb65d0d7775cc14a8ba5931d7de1d708 Mon Sep 17 00:00:00 2001 From: Song Liu <songliubraving@fb.com> Date: Fri, 2 Nov 2018 10:16:16 -0700 Subject: [PATCH 036/443] bpf: show real jited address in bpf_prog_info->jited_ksyms Currently, jited_ksyms in bpf_prog_info shows page addresses of jited bpf program. The main reason here is to not expose randomized start address. However, this is not ideal for detailed profiling (find hot instructions from stack traces). This patch replaces the page address with real prog start address. This change is OK because bpf_prog_get_info_by_fd() is only available to root. Signed-off-by: Song Liu <songliubraving@fb.com> Signed-off-by: Daniel Borkmann <daniel@iogearbox.net> --- kernel/bpf/syscall.c | 1 - 1 file changed, 1 deletion(-) diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c index ccb93277aae2c..34a9eef5992c2 100644 --- a/kernel/bpf/syscall.c +++ b/kernel/bpf/syscall.c @@ -2172,7 +2172,6 @@ static int bpf_prog_get_info_by_fd(struct bpf_prog *prog, user_ksyms = u64_to_user_ptr(info.jited_ksyms); for (i = 0; i < ulen; i++) { ksym_addr = (ulong) prog->aux->func[i]->bpf_func; - ksym_addr &= PAGE_MASK; if (put_user((u64) ksym_addr, &user_ksyms[i])) return -EFAULT; } From ff1889fc531f582f902175c0acc80321af540b24 Mon Sep 17 00:00:00 2001 From: Song Liu <songliubraving@fb.com> Date: Fri, 2 Nov 2018 10:16:17 -0700 Subject: [PATCH 037/443] bpf: show main program address and length in bpf_prog_info Currently, when there is no subprog (prog->aux->func_cnt == 0), bpf_prog_info does not return any jited_ksyms or jited_func_lens. This patch adds main program address (prog->bpf_func) and main program length (prog->jited_len) to bpf_prog_info. Signed-off-by: Song Liu <songliubraving@fb.com> Signed-off-by: Daniel Borkmann <daniel@iogearbox.net> --- kernel/bpf/syscall.c | 33 ++++++++++++++++++++++++--------- 1 file changed, 24 insertions(+), 9 deletions(-) diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c index 34a9eef5992c2..9418174c276c7 100644 --- a/kernel/bpf/syscall.c +++ b/kernel/bpf/syscall.c @@ -2158,11 +2158,11 @@ static int bpf_prog_get_info_by_fd(struct bpf_prog *prog, } ulen = info.nr_jited_ksyms; - info.nr_jited_ksyms = prog->aux->func_cnt; + info.nr_jited_ksyms = prog->aux->func_cnt ? : 1; if (info.nr_jited_ksyms && ulen) { if (bpf_dump_raw_ok()) { + unsigned long ksym_addr; u64 __user *user_ksyms; - ulong ksym_addr; u32 i; /* copy the address of the kernel symbol @@ -2170,9 +2170,17 @@ static int bpf_prog_get_info_by_fd(struct bpf_prog *prog, */ ulen = min_t(u32, info.nr_jited_ksyms, ulen); user_ksyms = u64_to_user_ptr(info.jited_ksyms); - for (i = 0; i < ulen; i++) { - ksym_addr = (ulong) prog->aux->func[i]->bpf_func; - if (put_user((u64) ksym_addr, &user_ksyms[i])) + if (prog->aux->func_cnt) { + for (i = 0; i < ulen; i++) { + ksym_addr = (unsigned long) + prog->aux->func[i]->bpf_func; + if (put_user((u64) ksym_addr, + &user_ksyms[i])) + return -EFAULT; + } + } else { + ksym_addr = (unsigned long) prog->bpf_func; + if (put_user((u64) ksym_addr, &user_ksyms[0])) return -EFAULT; } } else { @@ -2181,7 +2189,7 @@ static int bpf_prog_get_info_by_fd(struct bpf_prog *prog, } ulen = info.nr_jited_func_lens; - info.nr_jited_func_lens = prog->aux->func_cnt; + info.nr_jited_func_lens = prog->aux->func_cnt ? : 1; if (info.nr_jited_func_lens && ulen) { if (bpf_dump_raw_ok()) { u32 __user *user_lens; @@ -2190,9 +2198,16 @@ static int bpf_prog_get_info_by_fd(struct bpf_prog *prog, /* copy the JITed image lengths for each function */ ulen = min_t(u32, info.nr_jited_func_lens, ulen); user_lens = u64_to_user_ptr(info.jited_func_lens); - for (i = 0; i < ulen; i++) { - func_len = prog->aux->func[i]->jited_len; - if (put_user(func_len, &user_lens[i])) + if (prog->aux->func_cnt) { + for (i = 0; i < ulen; i++) { + func_len = + prog->aux->func[i]->jited_len; + if (put_user(func_len, &user_lens[i])) + return -EFAULT; + } + } else { + func_len = prog->jited_len; + if (put_user(func_len, &user_lens[0])) return -EFAULT; } } else { From 28c2fae726bf5003cd209b0d5910a642af98316f Mon Sep 17 00:00:00 2001 From: Daniel Borkmann <daniel@iogearbox.net> Date: Fri, 2 Nov 2018 11:35:46 +0100 Subject: [PATCH 038/443] bpf: fix bpf_prog_get_info_by_fd to return 0 func_lens for unpriv While dbecd7388476 ("bpf: get kernel symbol addresses via syscall") zeroed info.nr_jited_ksyms in bpf_prog_get_info_by_fd() for queries from unprivileged users, commit 815581c11cc2 ("bpf: get JITed image lengths of functions via syscall") forgot about doing so and therefore returns the #elems of the user set up buffer which is incorrect. It also needs to indicate a info.nr_jited_func_lens of zero. Fixes: 815581c11cc2 ("bpf: get JITed image lengths of functions via syscall") Signed-off-by: Daniel Borkmann <daniel@iogearbox.net> Cc: Sandipan Das <sandipan@linux.vnet.ibm.com> Cc: Song Liu <songliubraving@fb.com> Signed-off-by: Alexei Starovoitov <ast@kernel.org> --- kernel/bpf/syscall.c | 1 + 1 file changed, 1 insertion(+) diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c index 9418174c276c7..cf5040fd54344 100644 --- a/kernel/bpf/syscall.c +++ b/kernel/bpf/syscall.c @@ -2078,6 +2078,7 @@ static int bpf_prog_get_info_by_fd(struct bpf_prog *prog, info.jited_prog_len = 0; info.xlated_prog_len = 0; info.nr_jited_ksyms = 0; + info.nr_jited_func_lens = 0; goto done; } From 49682bfa1e0e448a711471a5db83be0df1fb39a2 Mon Sep 17 00:00:00 2001 From: Mathieu Malaterre <malat@debian.org> Date: Wed, 31 Oct 2018 13:16:58 +0100 Subject: [PATCH 039/443] net: document skb parameter in function 'skb_gso_size_check' Remove kernel-doc warning: net/core/skbuff.c:4953: warning: Function parameter or member 'skb' not described in 'skb_gso_size_check' Signed-off-by: Mathieu Malaterre <malat@debian.org> Signed-off-by: David S. Miller <davem@davemloft.net> --- net/core/skbuff.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/net/core/skbuff.c b/net/core/skbuff.c index 946de0e24c876..b4ee5c8b928f0 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -4944,6 +4944,8 @@ static unsigned int skb_gso_mac_seglen(const struct sk_buff *skb) * * This is a helper to do that correctly considering GSO_BY_FRAGS. * + * @skb: GSO skb + * * @seg_len: The segmented length (from skb_gso_*_seglen). In the * GSO_BY_FRAGS case this will be [header sizes + GSO_BY_FRAGS]. * From 7b900ead6cc66b2ee873cb042dfba169aa68b56c Mon Sep 17 00:00:00 2001 From: Frieder Schrempf <frieder.schrempf@kontron.de> Date: Wed, 31 Oct 2018 22:52:19 +0100 Subject: [PATCH 040/443] usbnet: smsc95xx: disable carrier check while suspending We need to make sure, that the carrier check polling is disabled while suspending. Otherwise we can end up with usbnet_read_cmd() being issued when only usbnet_read_cmd_nopm() is allowed. If this happens, read operations lock up. Fixes: d69d169493 ("usbnet: smsc95xx: fix link detection for disabled autonegotiation") Signed-off-by: Frieder Schrempf <frieder.schrempf@kontron.de> Reviewed-by: Raghuram Chary J <RaghuramChary.Jallipalli@microchip.com> Signed-off-by: David S. Miller <davem@davemloft.net> --- drivers/net/usb/smsc95xx.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/drivers/net/usb/smsc95xx.c b/drivers/net/usb/smsc95xx.c index 262e7a3c23cb6..2d17f3b9bb165 100644 --- a/drivers/net/usb/smsc95xx.c +++ b/drivers/net/usb/smsc95xx.c @@ -1598,6 +1598,8 @@ static int smsc95xx_suspend(struct usb_interface *intf, pm_message_t message) return ret; } + cancel_delayed_work_sync(&pdata->carrier_check); + if (pdata->suspend_flags) { netdev_warn(dev->net, "error during last resume\n"); pdata->suspend_flags = 0; @@ -1840,6 +1842,11 @@ static int smsc95xx_suspend(struct usb_interface *intf, pm_message_t message) */ if (ret && PMSG_IS_AUTO(message)) usbnet_resume(intf); + + if (ret) + schedule_delayed_work(&pdata->carrier_check, + CARRIER_CHECK_DELAY); + return ret; } From 2384d02520ff2a916169b2fd85ea50e923ed56c2 Mon Sep 17 00:00:00 2001 From: Jeff Barnhill <0xeffeff@gmail.com> Date: Fri, 2 Nov 2018 20:23:57 +0000 Subject: [PATCH 041/443] net/ipv6: Add anycast addresses to a global hashtable MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit icmp6_send() function is expensive on systems with a large number of interfaces. Every time it’s called, it has to verify that the source address does not correspond to an existing anycast address by looping through every device and every anycast address on the device. This can result in significant delays for a CPU when there are a large number of neighbors and ND timers are frequently timing out and calling neigh_invalidate(). Add anycast addresses to a global hashtable to allow quick searching for matching anycast addresses. This is based on inet6_addr_lst in addrconf.c. Signed-off-by: Jeff Barnhill <0xeffeff@gmail.com> Signed-off-by: David S. Miller <davem@davemloft.net> --- include/net/addrconf.h | 2 ++ include/net/if_inet6.h | 2 ++ net/ipv6/af_inet6.c | 5 +++ net/ipv6/anycast.c | 80 +++++++++++++++++++++++++++++++++++++++--- 4 files changed, 85 insertions(+), 4 deletions(-) diff --git a/include/net/addrconf.h b/include/net/addrconf.h index 14b789a123e7d..1656c59784987 100644 --- a/include/net/addrconf.h +++ b/include/net/addrconf.h @@ -317,6 +317,8 @@ bool ipv6_chk_acast_addr(struct net *net, struct net_device *dev, const struct in6_addr *addr); bool ipv6_chk_acast_addr_src(struct net *net, struct net_device *dev, const struct in6_addr *addr); +int ipv6_anycast_init(void); +void ipv6_anycast_cleanup(void); /* Device notifier */ int register_inet6addr_notifier(struct notifier_block *nb); diff --git a/include/net/if_inet6.h b/include/net/if_inet6.h index d7578cf49c3af..c9c78c15bce04 100644 --- a/include/net/if_inet6.h +++ b/include/net/if_inet6.h @@ -146,10 +146,12 @@ struct ifacaddr6 { struct in6_addr aca_addr; struct fib6_info *aca_rt; struct ifacaddr6 *aca_next; + struct hlist_node aca_addr_lst; int aca_users; refcount_t aca_refcnt; unsigned long aca_cstamp; unsigned long aca_tstamp; + struct rcu_head rcu; }; #define IFA_HOST IPV6_ADDR_LOOPBACK diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c index 3f4d61017a694..f0cd291034f0f 100644 --- a/net/ipv6/af_inet6.c +++ b/net/ipv6/af_inet6.c @@ -1001,6 +1001,9 @@ static int __init inet6_init(void) err = ip6_flowlabel_init(); if (err) goto ip6_flowlabel_fail; + err = ipv6_anycast_init(); + if (err) + goto ipv6_anycast_fail; err = addrconf_init(); if (err) goto addrconf_fail; @@ -1091,6 +1094,8 @@ static int __init inet6_init(void) ipv6_exthdrs_fail: addrconf_cleanup(); addrconf_fail: + ipv6_anycast_cleanup(); +ipv6_anycast_fail: ip6_flowlabel_cleanup(); ip6_flowlabel_fail: ndisc_late_cleanup(); diff --git a/net/ipv6/anycast.c b/net/ipv6/anycast.c index 4e0ff7031edd5..7698637cf8276 100644 --- a/net/ipv6/anycast.c +++ b/net/ipv6/anycast.c @@ -44,8 +44,22 @@ #include <net/checksum.h> +#define IN6_ADDR_HSIZE_SHIFT 8 +#define IN6_ADDR_HSIZE BIT(IN6_ADDR_HSIZE_SHIFT) +/* anycast address hash table + */ +static struct hlist_head inet6_acaddr_lst[IN6_ADDR_HSIZE]; +static DEFINE_SPINLOCK(acaddr_hash_lock); + static int ipv6_dev_ac_dec(struct net_device *dev, const struct in6_addr *addr); +static u32 inet6_acaddr_hash(struct net *net, const struct in6_addr *addr) +{ + u32 val = ipv6_addr_hash(addr) ^ net_hash_mix(net); + + return hash_32(val, IN6_ADDR_HSIZE_SHIFT); +} + /* * socket join an anycast group */ @@ -204,16 +218,39 @@ void ipv6_sock_ac_close(struct sock *sk) rtnl_unlock(); } +static void ipv6_add_acaddr_hash(struct net *net, struct ifacaddr6 *aca) +{ + unsigned int hash = inet6_acaddr_hash(net, &aca->aca_addr); + + spin_lock(&acaddr_hash_lock); + hlist_add_head_rcu(&aca->aca_addr_lst, &inet6_acaddr_lst[hash]); + spin_unlock(&acaddr_hash_lock); +} + +static void ipv6_del_acaddr_hash(struct ifacaddr6 *aca) +{ + spin_lock(&acaddr_hash_lock); + hlist_del_init_rcu(&aca->aca_addr_lst); + spin_unlock(&acaddr_hash_lock); +} + static void aca_get(struct ifacaddr6 *aca) { refcount_inc(&aca->aca_refcnt); } +static void aca_free_rcu(struct rcu_head *h) +{ + struct ifacaddr6 *aca = container_of(h, struct ifacaddr6, rcu); + + fib6_info_release(aca->aca_rt); + kfree(aca); +} + static void aca_put(struct ifacaddr6 *ac) { if (refcount_dec_and_test(&ac->aca_refcnt)) { - fib6_info_release(ac->aca_rt); - kfree(ac); + call_rcu(&ac->rcu, aca_free_rcu); } } @@ -229,6 +266,7 @@ static struct ifacaddr6 *aca_alloc(struct fib6_info *f6i, aca->aca_addr = *addr; fib6_info_hold(f6i); aca->aca_rt = f6i; + INIT_HLIST_NODE(&aca->aca_addr_lst); aca->aca_users = 1; /* aca_tstamp should be updated upon changes */ aca->aca_cstamp = aca->aca_tstamp = jiffies; @@ -285,6 +323,8 @@ int __ipv6_dev_ac_inc(struct inet6_dev *idev, const struct in6_addr *addr) aca_get(aca); write_unlock_bh(&idev->lock); + ipv6_add_acaddr_hash(net, aca); + ip6_ins_rt(net, f6i); addrconf_join_solict(idev->dev, &aca->aca_addr); @@ -325,6 +365,7 @@ int __ipv6_dev_ac_dec(struct inet6_dev *idev, const struct in6_addr *addr) else idev->ac_list = aca->aca_next; write_unlock_bh(&idev->lock); + ipv6_del_acaddr_hash(aca); addrconf_leave_solict(idev, &aca->aca_addr); ip6_del_rt(dev_net(idev->dev), aca->aca_rt); @@ -352,6 +393,8 @@ void ipv6_ac_destroy_dev(struct inet6_dev *idev) idev->ac_list = aca->aca_next; write_unlock_bh(&idev->lock); + ipv6_del_acaddr_hash(aca); + addrconf_leave_solict(idev, &aca->aca_addr); ip6_del_rt(dev_net(idev->dev), aca->aca_rt); @@ -390,17 +433,25 @@ static bool ipv6_chk_acast_dev(struct net_device *dev, const struct in6_addr *ad bool ipv6_chk_acast_addr(struct net *net, struct net_device *dev, const struct in6_addr *addr) { + unsigned int hash = inet6_acaddr_hash(net, addr); + struct net_device *nh_dev; + struct ifacaddr6 *aca; bool found = false; rcu_read_lock(); if (dev) found = ipv6_chk_acast_dev(dev, addr); else - for_each_netdev_rcu(net, dev) - if (ipv6_chk_acast_dev(dev, addr)) { + hlist_for_each_entry_rcu(aca, &inet6_acaddr_lst[hash], + aca_addr_lst) { + nh_dev = fib6_info_nh_dev(aca->aca_rt); + if (!nh_dev || !net_eq(dev_net(nh_dev), net)) + continue; + if (ipv6_addr_equal(&aca->aca_addr, addr)) { found = true; break; } + } rcu_read_unlock(); return found; } @@ -539,4 +590,25 @@ void ac6_proc_exit(struct net *net) { remove_proc_entry("anycast6", net->proc_net); } + +/* Init / cleanup code + */ +int __init ipv6_anycast_init(void) +{ + int i; + + for (i = 0; i < IN6_ADDR_HSIZE; i++) + INIT_HLIST_HEAD(&inet6_acaddr_lst[i]); + return 0; +} + +void ipv6_anycast_cleanup(void) +{ + int i; + + spin_lock(&acaddr_hash_lock); + for (i = 0; i < IN6_ADDR_HSIZE; i++) + WARN_ON(!hlist_empty(&inet6_acaddr_lst[i])); + spin_unlock(&acaddr_hash_lock); +} #endif From 284fb78ed7572117846f8e1d1d8e3dbfd16880c2 Mon Sep 17 00:00:00 2001 From: Tristram Ha <Tristram.Ha@microchip.com> Date: Fri, 2 Nov 2018 19:23:41 -0700 Subject: [PATCH 042/443] net: dsa: microchip: initialize mutex before use Initialize mutex before use. Avoid kernel complaint when CONFIG_DEBUG_LOCK_ALLOC is enabled. Fixes: b987e98e50ab90e5 ("dsa: add DSA switch driver for Microchip KSZ9477") Signed-off-by: Tristram Ha <Tristram.Ha@microchip.com> Reviewed-by: Pavel Machek <pavel@ucw.cz> Reviewed-by: Andrew Lunn <andrew@lunn.ch> Reviewed-by: Florian Fainelli <f.fainelli@gmail.com> Signed-off-by: David S. Miller <davem@davemloft.net> --- drivers/net/dsa/microchip/ksz_common.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/drivers/net/dsa/microchip/ksz_common.c b/drivers/net/dsa/microchip/ksz_common.c index 54e0ca6ed7308..86b6464b4525c 100644 --- a/drivers/net/dsa/microchip/ksz_common.c +++ b/drivers/net/dsa/microchip/ksz_common.c @@ -1117,11 +1117,6 @@ static int ksz_switch_init(struct ksz_device *dev) { int i; - mutex_init(&dev->reg_mutex); - mutex_init(&dev->stats_mutex); - mutex_init(&dev->alu_mutex); - mutex_init(&dev->vlan_mutex); - dev->ds->ops = &ksz_switch_ops; for (i = 0; i < ARRAY_SIZE(ksz_switch_chips); i++) { @@ -1206,6 +1201,11 @@ int ksz_switch_register(struct ksz_device *dev) if (dev->pdata) dev->chip_id = dev->pdata->chip_id; + mutex_init(&dev->reg_mutex); + mutex_init(&dev->stats_mutex); + mutex_init(&dev->alu_mutex); + mutex_init(&dev->vlan_mutex); + if (ksz_switch_detect(dev)) return -EINVAL; From c7e86acfcee30794dc99a0759924bf7b9d43f1ca Mon Sep 17 00:00:00 2001 From: David Howells <dhowells@redhat.com> Date: Thu, 1 Nov 2018 13:39:53 +0000 Subject: [PATCH 043/443] rxrpc: Fix lockup due to no error backoff after ack transmit error If the network becomes (partially) unavailable, say by disabling IPv6, the background ACK transmission routine can get itself into a tizzy by proposing immediate ACK retransmission. Since we're in the call event processor, that happens immediately without returning to the workqueue manager. The condition should clear after a while when either the network comes back or the call times out. Fix this by: (1) When re-proposing an ACK on failed Tx, don't schedule it immediately. This will allow a certain amount of time to elapse before we try again. (2) Enforce a return to the workqueue manager after a certain number of iterations of the call processing loop. (3) Add a backoff delay that increases the delay on deferred ACKs by a jiffy per failed transmission to a limit of HZ. The backoff delay is cleared on a successful return from kernel_sendmsg(). (4) Cancel calls immediately if the opening sendmsg fails. The layer above can arrange retransmission or rotate to another server. Fixes: 248f219cb8bc ("rxrpc: Rewrite the data and ack handling code") Signed-off-by: David Howells <dhowells@redhat.com> Signed-off-by: David S. Miller <davem@davemloft.net> --- net/rxrpc/ar-internal.h | 1 + net/rxrpc/call_event.c | 18 ++++++++++++++---- net/rxrpc/output.c | 35 +++++++++++++++++++++++++++++++---- 3 files changed, 46 insertions(+), 8 deletions(-) diff --git a/net/rxrpc/ar-internal.h b/net/rxrpc/ar-internal.h index 382196e57a26c..bc628acf4f4ff 100644 --- a/net/rxrpc/ar-internal.h +++ b/net/rxrpc/ar-internal.h @@ -611,6 +611,7 @@ struct rxrpc_call { * not hard-ACK'd packet follows this. */ rxrpc_seq_t tx_top; /* Highest Tx slot allocated. */ + u16 tx_backoff; /* Delay to insert due to Tx failure */ /* TCP-style slow-start congestion control [RFC5681]. Since the SMSS * is fixed, we keep these numbers in terms of segments (ie. DATA diff --git a/net/rxrpc/call_event.c b/net/rxrpc/call_event.c index 8e7434e92097e..468efc3660c03 100644 --- a/net/rxrpc/call_event.c +++ b/net/rxrpc/call_event.c @@ -123,6 +123,7 @@ static void __rxrpc_propose_ACK(struct rxrpc_call *call, u8 ack_reason, else ack_at = expiry; + ack_at += READ_ONCE(call->tx_backoff); ack_at += now; if (time_before(ack_at, call->ack_at)) { WRITE_ONCE(call->ack_at, ack_at); @@ -311,6 +312,7 @@ void rxrpc_process_call(struct work_struct *work) container_of(work, struct rxrpc_call, processor); rxrpc_serial_t *send_ack; unsigned long now, next, t; + unsigned int iterations = 0; rxrpc_see_call(call); @@ -319,6 +321,11 @@ void rxrpc_process_call(struct work_struct *work) call->debug_id, rxrpc_call_states[call->state], call->events); recheck_state: + /* Limit the number of times we do this before returning to the manager */ + iterations++; + if (iterations > 5) + goto requeue; + if (test_and_clear_bit(RXRPC_CALL_EV_ABORT, &call->events)) { rxrpc_send_abort_packet(call); goto recheck_state; @@ -447,13 +454,16 @@ void rxrpc_process_call(struct work_struct *work) rxrpc_reduce_call_timer(call, next, now, rxrpc_timer_restart); /* other events may have been raised since we started checking */ - if (call->events && call->state < RXRPC_CALL_COMPLETE) { - __rxrpc_queue_call(call); - goto out; - } + if (call->events && call->state < RXRPC_CALL_COMPLETE) + goto requeue; out_put: rxrpc_put_call(call, rxrpc_call_put); out: _leave(""); + return; + +requeue: + __rxrpc_queue_call(call); + goto out; } diff --git a/net/rxrpc/output.c b/net/rxrpc/output.c index 1894188888391..736aa92811004 100644 --- a/net/rxrpc/output.c +++ b/net/rxrpc/output.c @@ -34,6 +34,21 @@ struct rxrpc_abort_buffer { static const char rxrpc_keepalive_string[] = ""; +/* + * Increase Tx backoff on transmission failure and clear it on success. + */ +static void rxrpc_tx_backoff(struct rxrpc_call *call, int ret) +{ + if (ret < 0) { + u16 tx_backoff = READ_ONCE(call->tx_backoff); + + if (tx_backoff < HZ) + WRITE_ONCE(call->tx_backoff, tx_backoff + 1); + } else { + WRITE_ONCE(call->tx_backoff, 0); + } +} + /* * Arrange for a keepalive ping a certain time after we last transmitted. This * lets the far side know we're still interested in this call and helps keep @@ -210,6 +225,7 @@ int rxrpc_send_ack_packet(struct rxrpc_call *call, bool ping, else trace_rxrpc_tx_packet(call->debug_id, &pkt->whdr, rxrpc_tx_point_call_ack); + rxrpc_tx_backoff(call, ret); if (call->state < RXRPC_CALL_COMPLETE) { if (ret < 0) { @@ -218,7 +234,7 @@ int rxrpc_send_ack_packet(struct rxrpc_call *call, bool ping, rxrpc_propose_ACK(call, pkt->ack.reason, ntohs(pkt->ack.maxSkew), ntohl(pkt->ack.serial), - true, true, + false, true, rxrpc_propose_ack_retry_tx); } else { spin_lock_bh(&call->lock); @@ -300,7 +316,7 @@ int rxrpc_send_abort_packet(struct rxrpc_call *call) else trace_rxrpc_tx_packet(call->debug_id, &pkt.whdr, rxrpc_tx_point_call_abort); - + rxrpc_tx_backoff(call, ret); rxrpc_put_connection(conn); return ret; @@ -413,6 +429,7 @@ int rxrpc_send_data_packet(struct rxrpc_call *call, struct sk_buff *skb, else trace_rxrpc_tx_packet(call->debug_id, &whdr, rxrpc_tx_point_call_data_nofrag); + rxrpc_tx_backoff(call, ret); if (ret == -EMSGSIZE) goto send_fragmentable; @@ -445,9 +462,18 @@ int rxrpc_send_data_packet(struct rxrpc_call *call, struct sk_buff *skb, rxrpc_reduce_call_timer(call, expect_rx_by, nowj, rxrpc_timer_set_for_normal); } - } - rxrpc_set_keepalive(call); + rxrpc_set_keepalive(call); + } else { + /* Cancel the call if the initial transmission fails, + * particularly if that's due to network routing issues that + * aren't going away anytime soon. The layer above can arrange + * the retransmission. + */ + if (!test_and_set_bit(RXRPC_CALL_BEGAN_RX_TIMER, &call->flags)) + rxrpc_set_call_completion(call, RXRPC_CALL_LOCAL_ERROR, + RX_USER_ABORT, ret); + } _leave(" = %d [%u]", ret, call->peer->maxdata); return ret; @@ -506,6 +532,7 @@ int rxrpc_send_data_packet(struct rxrpc_call *call, struct sk_buff *skb, else trace_rxrpc_tx_packet(call->debug_id, &whdr, rxrpc_tx_point_call_data_frag); + rxrpc_tx_backoff(call, ret); up_write(&conn->params.local->defrag_sem); goto done; From 09e805d2570a3a94f13dd9c9ad2bcab23da76e09 Mon Sep 17 00:00:00 2001 From: Doug Berger <opendmb@gmail.com> Date: Thu, 1 Nov 2018 15:55:37 -0700 Subject: [PATCH 044/443] net: bcmgenet: protect stop from timeout A timing hazard exists when the network interface is stopped that allows a watchdog timeout to be processed by a separate core in parallel. This creates the potential for the timeout handler to wake the queues while the driver is shutting down, or access registers after their clocks have been removed. The more common case is that the watchdog timeout will produce a warning message which doesn't lead to a crash. The chances of this are greatly increased by the fact that bcmgenet_netif_stop stops the transmit queues which can easily precipitate a watchdog time- out because of stale trans_start data in the queues. This commit corrects the behavior by ensuring that the watchdog timeout is disabled before enterring bcmgenet_netif_stop. There are currently only two users of the bcmgenet_netif_stop function: close and suspend. The close case already handles the issue by exiting the RUNNING state before invoking the driver close service. The suspend case now performs the netif_device_detach to exit the PRESENT state before the call to bcmgenet_netif_stop rather than after it. These behaviors prevent any future scheduling of the driver timeout service during the window. The netif_tx_stop_all_queues function in bcmgenet_netif_stop is replaced with netif_tx_disable to ensure synchronization with any transmit or timeout threads that may already be executing on other cores. For symmetry, the netif_device_attach call upon resume is moved to after the call to bcmgenet_netif_start. Since it wakes the transmit queues it is not necessary to invoke netif_tx_start_all_queues from bcmgenet_netif_start so it is moved into the driver open service. Fixes: 1c1008c793fa ("net: bcmgenet: add main driver file") Signed-off-by: Doug Berger <opendmb@gmail.com> Signed-off-by: Florian Fainelli <f.fainelli@gmail.com> Signed-off-by: David S. Miller <davem@davemloft.net> --- drivers/net/ethernet/broadcom/genet/bcmgenet.c | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/drivers/net/ethernet/broadcom/genet/bcmgenet.c b/drivers/net/ethernet/broadcom/genet/bcmgenet.c index 20c1681bb1afe..2d6f090bf6440 100644 --- a/drivers/net/ethernet/broadcom/genet/bcmgenet.c +++ b/drivers/net/ethernet/broadcom/genet/bcmgenet.c @@ -2855,7 +2855,6 @@ static void bcmgenet_netif_start(struct net_device *dev) umac_enable_set(priv, CMD_TX_EN | CMD_RX_EN, true); - netif_tx_start_all_queues(dev); bcmgenet_enable_tx_napi(priv); /* Monitor link interrupts now */ @@ -2937,6 +2936,8 @@ static int bcmgenet_open(struct net_device *dev) bcmgenet_netif_start(dev); + netif_tx_start_all_queues(dev); + return 0; err_irq1: @@ -2958,7 +2959,7 @@ static void bcmgenet_netif_stop(struct net_device *dev) struct bcmgenet_priv *priv = netdev_priv(dev); bcmgenet_disable_tx_napi(priv); - netif_tx_stop_all_queues(dev); + netif_tx_disable(dev); /* Disable MAC receive */ umac_enable_set(priv, CMD_RX_EN, false); @@ -3620,13 +3621,13 @@ static int bcmgenet_suspend(struct device *d) if (!netif_running(dev)) return 0; + netif_device_detach(dev); + bcmgenet_netif_stop(dev); if (!device_may_wakeup(d)) phy_suspend(dev->phydev); - netif_device_detach(dev); - /* Prepare the device for Wake-on-LAN and switch to the slow clock */ if (device_may_wakeup(d) && priv->wolopts) { ret = bcmgenet_power_down(priv, GENET_POWER_WOL_MAGIC); @@ -3700,8 +3701,6 @@ static int bcmgenet_resume(struct device *d) /* Always enable ring 16 - descriptor ring */ bcmgenet_enable_dma(priv, dma_ctrl); - netif_device_attach(dev); - if (!device_may_wakeup(d)) phy_resume(dev->phydev); @@ -3710,6 +3709,8 @@ static int bcmgenet_resume(struct device *d) bcmgenet_netif_start(dev); + netif_device_attach(dev); + return 0; out_clk_disable: From 7cb6a2a2c72c1ed8f42fb01f1a661281b568dead Mon Sep 17 00:00:00 2001 From: Florian Fainelli <f.fainelli@gmail.com> Date: Thu, 1 Nov 2018 15:55:38 -0700 Subject: [PATCH 045/443] net: systemport: Protect stop from timeout A timing hazard exists when the network interface is stopped that allows a watchdog timeout to be processed by a separate core in parallel. This creates the potential for the timeout handler to wake the queues while the driver is shutting down, or access registers after their clocks have been removed. The more common case is that the watchdog timeout will produce a warning message which doesn't lead to a crash. The chances of this are greatly increased by the fact that bcm_sysport_netif_stop stops the transmit queues which can easily precipitate a watchdog time- out because of stale trans_start data in the queues. This commit corrects the behavior by ensuring that the watchdog timeout is disabled before enterring bcm_sysport_netif_stop. There are currently only two users of the bcm_sysport_netif_stop function: close and suspend. The close case already handles the issue by exiting the RUNNING state before invoking the driver close service. The suspend case now performs the netif_device_detach to exit the PRESENT state before the call to bcm_sysport_netif_stop rather than after it. These behaviors prevent any future scheduling of the driver timeout service during the window. The netif_tx_stop_all_queues function in bcm_sysport_netif_stop is replaced with netif_tx_disable to ensure synchronization with any transmit or timeout threads that may already be executing on other cores. For symmetry, the netif_device_attach call upon resume is moved to after the call to bcm_sysport_netif_start. Since it wakes the transmit queues it is not necessary to invoke netif_tx_start_all_queues from bcm_sysport_netif_start so it is moved into the driver open service. Fixes: 40755a0fce17 ("net: systemport: add suspend and resume support") Fixes: 80105befdb4b ("net: systemport: add Broadcom SYSTEMPORT Ethernet MAC driver") Signed-off-by: Florian Fainelli <f.fainelli@gmail.com> Signed-off-by: David S. Miller <davem@davemloft.net> --- drivers/net/ethernet/broadcom/bcmsysport.c | 15 +++++++-------- 1 file changed, 7 insertions(+), 8 deletions(-) diff --git a/drivers/net/ethernet/broadcom/bcmsysport.c b/drivers/net/ethernet/broadcom/bcmsysport.c index 4122553e224b2..0e2d99c737e35 100644 --- a/drivers/net/ethernet/broadcom/bcmsysport.c +++ b/drivers/net/ethernet/broadcom/bcmsysport.c @@ -1902,9 +1902,6 @@ static void bcm_sysport_netif_start(struct net_device *dev) intrl2_1_mask_clear(priv, 0xffffffff); else intrl2_0_mask_clear(priv, INTRL2_0_TDMA_MBDONE_MASK); - - /* Last call before we start the real business */ - netif_tx_start_all_queues(dev); } static void rbuf_init(struct bcm_sysport_priv *priv) @@ -2048,6 +2045,8 @@ static int bcm_sysport_open(struct net_device *dev) bcm_sysport_netif_start(dev); + netif_tx_start_all_queues(dev); + return 0; out_clear_rx_int: @@ -2071,7 +2070,7 @@ static void bcm_sysport_netif_stop(struct net_device *dev) struct bcm_sysport_priv *priv = netdev_priv(dev); /* stop all software from updating hardware */ - netif_tx_stop_all_queues(dev); + netif_tx_disable(dev); napi_disable(&priv->napi); cancel_work_sync(&priv->dim.dim.work); phy_stop(dev->phydev); @@ -2658,12 +2657,12 @@ static int __maybe_unused bcm_sysport_suspend(struct device *d) if (!netif_running(dev)) return 0; + netif_device_detach(dev); + bcm_sysport_netif_stop(dev); phy_suspend(dev->phydev); - netif_device_detach(dev); - /* Disable UniMAC RX */ umac_enable_set(priv, CMD_RX_EN, 0); @@ -2746,8 +2745,6 @@ static int __maybe_unused bcm_sysport_resume(struct device *d) goto out_free_rx_ring; } - netif_device_attach(dev); - /* RX pipe enable */ topctrl_writel(priv, 0, RX_FLUSH_CNTL); @@ -2788,6 +2785,8 @@ static int __maybe_unused bcm_sysport_resume(struct device *d) bcm_sysport_netif_start(dev); + netif_device_attach(dev); + return 0; out_free_rx_ring: From cd26ea6d50a207ee37e0364ecc2d196d6c9671e8 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo <acme@redhat.com> Date: Sat, 3 Nov 2018 08:19:56 -0300 Subject: [PATCH 046/443] perf trace: Fix setting of augmented payload when using eBPF + raw_syscalls For now with BPF raw_augmented we hook into raw_syscalls:sys_enter and there we get all 6 syscall args plus the tracepoint common fields (sizeof(long)) and the syscall_nr (another long). So we check if that is the case and if so don't look after the sc->args_size, but always after the full raw_syscalls:sys_enter payload, which is fixed. We'll revisit this later to pass s->args_size to the BPF augmenter (now tools/perf/examples/bpf/augmented_raw_syscalls.c, so that it copies only what we need for each syscall, like what happens when we use syscalls:sys_enter_NAME, so that we reduce the kernel/userspace traffic to just what is needed for each syscall. Cc: Adrian Hunter <adrian.hunter@intel.com> Cc: David Ahern <dsahern@gmail.com> Cc: Jiri Olsa <jolsa@kernel.org> Cc: Namhyung Kim <namhyung@kernel.org> Cc: Wang Nan <wangnan0@huawei.com> Link: https://lkml.kernel.org/n/tip-nlslrg8apxdsobt4pwl3n7ur@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com> --- tools/perf/builtin-trace.c | 37 ++++++++++++++++++++++++++++++------- 1 file changed, 30 insertions(+), 7 deletions(-) diff --git a/tools/perf/builtin-trace.c b/tools/perf/builtin-trace.c index f582ca575883e..835619476370c 100644 --- a/tools/perf/builtin-trace.c +++ b/tools/perf/builtin-trace.c @@ -108,6 +108,7 @@ struct trace { } stats; unsigned int max_stack; unsigned int min_stack; + bool raw_augmented_syscalls; bool not_ev_qualifier; bool live; bool full_time; @@ -1724,13 +1725,28 @@ static int trace__fprintf_sample(struct trace *trace, struct perf_evsel *evsel, return printed; } -static void *syscall__augmented_args(struct syscall *sc, struct perf_sample *sample, int *augmented_args_size) +static void *syscall__augmented_args(struct syscall *sc, struct perf_sample *sample, int *augmented_args_size, bool raw_augmented) { void *augmented_args = NULL; + /* + * For now with BPF raw_augmented we hook into raw_syscalls:sys_enter + * and there we get all 6 syscall args plus the tracepoint common + * fields (sizeof(long)) and the syscall_nr (another long). So we check + * if that is the case and if so don't look after the sc->args_size, + * but always after the full raw_syscalls:sys_enter payload, which is + * fixed. + * + * We'll revisit this later to pass s->args_size to the BPF augmenter + * (now tools/perf/examples/bpf/augmented_raw_syscalls.c, so that it + * copies only what we need for each syscall, like what happens when we + * use syscalls:sys_enter_NAME, so that we reduce the kernel/userspace + * traffic to just what is needed for each syscall. + */ + int args_size = raw_augmented ? (8 * (int)sizeof(long)) : sc->args_size; - *augmented_args_size = sample->raw_size - sc->args_size; + *augmented_args_size = sample->raw_size - args_size; if (*augmented_args_size > 0) - augmented_args = sample->raw_data + sc->args_size; + augmented_args = sample->raw_data + args_size; return augmented_args; } @@ -1780,7 +1796,7 @@ static int trace__sys_enter(struct trace *trace, struct perf_evsel *evsel, * here and avoid using augmented syscalls when the evsel is the raw_syscalls one. */ if (evsel != trace->syscalls.events.sys_enter) - augmented_args = syscall__augmented_args(sc, sample, &augmented_args_size); + augmented_args = syscall__augmented_args(sc, sample, &augmented_args_size, trace->raw_augmented_syscalls); ttrace->entry_time = sample->time; msg = ttrace->entry_str; printed += scnprintf(msg + printed, trace__entry_str_size - printed, "%s(", sc->name); @@ -1833,7 +1849,7 @@ static int trace__fprintf_sys_enter(struct trace *trace, struct perf_evsel *evse goto out_put; args = perf_evsel__sc_tp_ptr(evsel, args, sample); - augmented_args = syscall__augmented_args(sc, sample, &augmented_args_size); + augmented_args = syscall__augmented_args(sc, sample, &augmented_args_size, trace->raw_augmented_syscalls); syscall__scnprintf_args(sc, msg, sizeof(msg), args, augmented_args, augmented_args_size, trace, thread); fprintf(trace->output, "%s", msg); err = 0; @@ -3501,8 +3517,15 @@ int cmd_trace(int argc, const char **argv) evsel->handler = trace__sys_enter; evlist__for_each_entry(trace.evlist, evsel) { - if (strstarts(perf_evsel__name(evsel), "syscalls:sys_exit_") || - strcmp(perf_evsel__name(evsel), "raw_syscalls:sys_exit") == 0) { + bool raw_syscalls_sys_exit = strcmp(perf_evsel__name(evsel), "raw_syscalls:sys_exit") == 0; + + if (raw_syscalls_sys_exit) { + trace.raw_augmented_syscalls = true; + goto init_augmented_syscall_tp; + } + + if (strstarts(perf_evsel__name(evsel), "syscalls:sys_exit_")) { +init_augmented_syscall_tp: perf_evsel__init_augmented_syscall_tp(evsel); perf_evsel__init_augmented_syscall_tp_ret(evsel); evsel->handler = trace__sys_exit; From 17b8b74c0f8dbf9b9e3301f9ca5b65dd1c079951 Mon Sep 17 00:00:00 2001 From: Jozsef Kadlecsik <kadlec@blackhole.kfki.hu> Date: Fri, 19 Oct 2018 19:35:19 +0200 Subject: [PATCH 047/443] netfilter: ipset: Correct rcu_dereference() call in ip_set_put_comment() The function is called when rcu_read_lock() is held and not when rcu_read_lock_bh() is held. Signed-off-by: Jozsef Kadlecsik <kadlec@blackhole.kfki.hu> Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org> --- include/linux/netfilter/ipset/ip_set_comment.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/include/linux/netfilter/ipset/ip_set_comment.h b/include/linux/netfilter/ipset/ip_set_comment.h index 8e2bab1e8e909..70877f8de7e91 100644 --- a/include/linux/netfilter/ipset/ip_set_comment.h +++ b/include/linux/netfilter/ipset/ip_set_comment.h @@ -43,11 +43,11 @@ ip_set_init_comment(struct ip_set *set, struct ip_set_comment *comment, rcu_assign_pointer(comment->c, c); } -/* Used only when dumping a set, protected by rcu_read_lock_bh() */ +/* Used only when dumping a set, protected by rcu_read_lock() */ static inline int ip_set_put_comment(struct sk_buff *skb, const struct ip_set_comment *comment) { - struct ip_set_comment_rcu *c = rcu_dereference_bh(comment->c); + struct ip_set_comment_rcu *c = rcu_dereference(comment->c); if (!c) return 0; From 54451f60c8fa061af9051a53be9786393947367c Mon Sep 17 00:00:00 2001 From: Taehee Yoo <ap420073@gmail.com> Date: Sun, 21 Oct 2018 00:00:08 +0900 Subject: [PATCH 048/443] netfilter: xt_IDLETIMER: add sysfs filename checking routine When IDLETIMER rule is added, sysfs file is created under /sys/class/xt_idletimer/timers/ But some label name shouldn't be used. ".", "..", "power", "uevent", "subsystem", etc... So that sysfs filename checking routine is needed. test commands: %iptables -I INPUT -j IDLETIMER --timeout 1 --label "power" splat looks like: [95765.423132] sysfs: cannot create duplicate filename '/devices/virtual/xt_idletimer/timers/power' [95765.433418] CPU: 0 PID: 8446 Comm: iptables Not tainted 4.19.0-rc6+ #20 [95765.449755] Call Trace: [95765.449755] dump_stack+0xc9/0x16b [95765.449755] ? show_regs_print_info+0x5/0x5 [95765.449755] sysfs_warn_dup+0x74/0x90 [95765.449755] sysfs_add_file_mode_ns+0x352/0x500 [95765.449755] sysfs_create_file_ns+0x179/0x270 [95765.449755] ? sysfs_add_file_mode_ns+0x500/0x500 [95765.449755] ? idletimer_tg_checkentry+0x3e5/0xb1b [xt_IDLETIMER] [95765.449755] ? rcu_read_lock_sched_held+0x114/0x130 [95765.449755] ? __kmalloc_track_caller+0x211/0x2b0 [95765.449755] ? memcpy+0x34/0x50 [95765.449755] idletimer_tg_checkentry+0x4e2/0xb1b [xt_IDLETIMER] [ ... ] Fixes: 0902b469bd25 ("netfilter: xtables: idletimer target implementation") Signed-off-by: Taehee Yoo <ap420073@gmail.com> Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org> --- net/netfilter/xt_IDLETIMER.c | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) diff --git a/net/netfilter/xt_IDLETIMER.c b/net/netfilter/xt_IDLETIMER.c index c6acfc2d9c841..eb4cbd244c3d3 100644 --- a/net/netfilter/xt_IDLETIMER.c +++ b/net/netfilter/xt_IDLETIMER.c @@ -114,6 +114,22 @@ static void idletimer_tg_expired(struct timer_list *t) schedule_work(&timer->work); } +static int idletimer_check_sysfs_name(const char *name, unsigned int size) +{ + int ret; + + ret = xt_check_proc_name(name, size); + if (ret < 0) + return ret; + + if (!strcmp(name, "power") || + !strcmp(name, "subsystem") || + !strcmp(name, "uevent")) + return -EINVAL; + + return 0; +} + static int idletimer_tg_create(struct idletimer_tg_info *info) { int ret; @@ -124,6 +140,10 @@ static int idletimer_tg_create(struct idletimer_tg_info *info) goto out; } + ret = idletimer_check_sysfs_name(info->label, sizeof(info->label)); + if (ret < 0) + goto out_free_timer; + sysfs_attr_init(&info->timer->attr.attr); info->timer->attr.attr.name = kstrdup(info->label, GFP_KERNEL); if (!info->timer->attr.attr.name) { From 8a02bdd50b2ecb6d62121d2958d3ea186cc88ce7 Mon Sep 17 00:00:00 2001 From: Jozsef Kadlecsik <kadlec@blackhole.kfki.hu> Date: Tue, 30 Oct 2018 22:43:42 +0100 Subject: [PATCH 049/443] netfilter: ipset: Fix calling ip_set() macro at dumping The ip_set() macro is called when either ip_set_ref_lock held only or no lock/nfnl mutex is held at dumping. Take this into account properly. Also, use Pablo's suggestion to use rcu_dereference_raw(), the ref_netlink protects the set. Signed-off-by: Jozsef Kadlecsik <kadlec@blackhole.kfki.hu> Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org> --- net/netfilter/ipset/ip_set_core.c | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/net/netfilter/ipset/ip_set_core.c b/net/netfilter/ipset/ip_set_core.c index 68db946df1511..1577f2f76060d 100644 --- a/net/netfilter/ipset/ip_set_core.c +++ b/net/netfilter/ipset/ip_set_core.c @@ -55,11 +55,15 @@ MODULE_AUTHOR("Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>"); MODULE_DESCRIPTION("core IP set support"); MODULE_ALIAS_NFNL_SUBSYS(NFNL_SUBSYS_IPSET); -/* When the nfnl mutex is held: */ +/* When the nfnl mutex or ip_set_ref_lock is held: */ #define ip_set_dereference(p) \ - rcu_dereference_protected(p, lockdep_nfnl_is_held(NFNL_SUBSYS_IPSET)) + rcu_dereference_protected(p, \ + lockdep_nfnl_is_held(NFNL_SUBSYS_IPSET) || \ + lockdep_is_held(&ip_set_ref_lock)) #define ip_set(inst, id) \ ip_set_dereference((inst)->ip_set_list)[id] +#define ip_set_ref_netlink(inst,id) \ + rcu_dereference_raw((inst)->ip_set_list)[id] /* The set types are implemented in modules and registered set types * can be found in ip_set_type_list. Adding/deleting types is @@ -1251,7 +1255,7 @@ ip_set_dump_done(struct netlink_callback *cb) struct ip_set_net *inst = (struct ip_set_net *)cb->args[IPSET_CB_NET]; ip_set_id_t index = (ip_set_id_t)cb->args[IPSET_CB_INDEX]; - struct ip_set *set = ip_set(inst, index); + struct ip_set *set = ip_set_ref_netlink(inst, index); if (set->variant->uref) set->variant->uref(set, cb, false); @@ -1440,7 +1444,7 @@ ip_set_dump_start(struct sk_buff *skb, struct netlink_callback *cb) release_refcount: /* If there was an error or set is done, release set */ if (ret || !cb->args[IPSET_CB_ARG0]) { - set = ip_set(inst, index); + set = ip_set_ref_netlink(inst, index); if (set->variant->uref) set->variant->uref(set, cb, false); pr_debug("release set %s\n", set->name); From a95a7774d51e13f9cf4b7285666829b68852f07a Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso <pablo@netfilter.org> Date: Fri, 2 Nov 2018 00:11:34 +0100 Subject: [PATCH 050/443] netfilter: conntrack: add nf_{tcp,udp,sctp,icmp,dccp,icmpv6,generic}_pernet() Expose these functions to access conntrack protocol tracker netns area, nfnetlink_cttimeout needs this. Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org> --- include/net/netfilter/nf_conntrack_l4proto.h | 39 ++++++++++++++++++++ net/netfilter/nf_conntrack_proto_dccp.c | 13 ++----- net/netfilter/nf_conntrack_proto_generic.c | 11 ++---- net/netfilter/nf_conntrack_proto_icmp.c | 11 ++---- net/netfilter/nf_conntrack_proto_icmpv6.c | 11 ++---- net/netfilter/nf_conntrack_proto_sctp.c | 11 ++---- net/netfilter/nf_conntrack_proto_tcp.c | 15 +++----- net/netfilter/nf_conntrack_proto_udp.c | 11 ++---- 8 files changed, 63 insertions(+), 59 deletions(-) diff --git a/include/net/netfilter/nf_conntrack_l4proto.h b/include/net/netfilter/nf_conntrack_l4proto.h index eed04af9b75e5..ae7b86f587f2c 100644 --- a/include/net/netfilter/nf_conntrack_l4proto.h +++ b/include/net/netfilter/nf_conntrack_l4proto.h @@ -153,4 +153,43 @@ void nf_ct_l4proto_log_invalid(const struct sk_buff *skb, const char *fmt, ...) { } #endif /* CONFIG_SYSCTL */ +static inline struct nf_generic_net *nf_generic_pernet(struct net *net) +{ + return &net->ct.nf_ct_proto.generic; +} + +static inline struct nf_tcp_net *nf_tcp_pernet(struct net *net) +{ + return &net->ct.nf_ct_proto.tcp; +} + +static inline struct nf_udp_net *nf_udp_pernet(struct net *net) +{ + return &net->ct.nf_ct_proto.udp; +} + +static inline struct nf_icmp_net *nf_icmp_pernet(struct net *net) +{ + return &net->ct.nf_ct_proto.icmp; +} + +static inline struct nf_icmp_net *nf_icmpv6_pernet(struct net *net) +{ + return &net->ct.nf_ct_proto.icmpv6; +} + +#ifdef CONFIG_NF_CT_PROTO_DCCP +static inline struct nf_dccp_net *nf_dccp_pernet(struct net *net) +{ + return &net->ct.nf_ct_proto.dccp; +} +#endif + +#ifdef CONFIG_NF_CT_PROTO_SCTP +static inline struct nf_sctp_net *nf_sctp_pernet(struct net *net) +{ + return &net->ct.nf_ct_proto.sctp; +} +#endif + #endif /*_NF_CONNTRACK_PROTOCOL_H*/ diff --git a/net/netfilter/nf_conntrack_proto_dccp.c b/net/netfilter/nf_conntrack_proto_dccp.c index 171e9e122e5f1..023c1445bc396 100644 --- a/net/netfilter/nf_conntrack_proto_dccp.c +++ b/net/netfilter/nf_conntrack_proto_dccp.c @@ -384,11 +384,6 @@ dccp_state_table[CT_DCCP_ROLE_MAX + 1][DCCP_PKT_SYNCACK + 1][CT_DCCP_MAX + 1] = }, }; -static inline struct nf_dccp_net *dccp_pernet(struct net *net) -{ - return &net->ct.nf_ct_proto.dccp; -} - static noinline bool dccp_new(struct nf_conn *ct, const struct sk_buff *skb, const struct dccp_hdr *dh) @@ -401,7 +396,7 @@ dccp_new(struct nf_conn *ct, const struct sk_buff *skb, state = dccp_state_table[CT_DCCP_ROLE_CLIENT][dh->dccph_type][CT_DCCP_NONE]; switch (state) { default: - dn = dccp_pernet(net); + dn = nf_dccp_pernet(net); if (dn->dccp_loose == 0) { msg = "not picking up existing connection "; goto out_invalid; @@ -568,7 +563,7 @@ static int dccp_packet(struct nf_conn *ct, struct sk_buff *skb, timeouts = nf_ct_timeout_lookup(ct); if (!timeouts) - timeouts = dccp_pernet(nf_ct_net(ct))->dccp_timeout; + timeouts = nf_dccp_pernet(nf_ct_net(ct))->dccp_timeout; nf_ct_refresh_acct(ct, ctinfo, skb, timeouts[new_state]); return NF_ACCEPT; @@ -681,7 +676,7 @@ static int nlattr_to_dccp(struct nlattr *cda[], struct nf_conn *ct) static int dccp_timeout_nlattr_to_obj(struct nlattr *tb[], struct net *net, void *data) { - struct nf_dccp_net *dn = dccp_pernet(net); + struct nf_dccp_net *dn = nf_dccp_pernet(net); unsigned int *timeouts = data; int i; @@ -814,7 +809,7 @@ static int dccp_kmemdup_sysctl_table(struct net *net, struct nf_proto_net *pn, static int dccp_init_net(struct net *net) { - struct nf_dccp_net *dn = dccp_pernet(net); + struct nf_dccp_net *dn = nf_dccp_pernet(net); struct nf_proto_net *pn = &dn->pn; if (!pn->users) { diff --git a/net/netfilter/nf_conntrack_proto_generic.c b/net/netfilter/nf_conntrack_proto_generic.c index e10e867e0b55f..5da19d5fbc767 100644 --- a/net/netfilter/nf_conntrack_proto_generic.c +++ b/net/netfilter/nf_conntrack_proto_generic.c @@ -27,11 +27,6 @@ static bool nf_generic_should_process(u8 proto) } } -static inline struct nf_generic_net *generic_pernet(struct net *net) -{ - return &net->ct.nf_ct_proto.generic; -} - static bool generic_pkt_to_tuple(const struct sk_buff *skb, unsigned int dataoff, struct net *net, struct nf_conntrack_tuple *tuple) @@ -58,7 +53,7 @@ static int generic_packet(struct nf_conn *ct, } if (!timeout) - timeout = &generic_pernet(nf_ct_net(ct))->timeout; + timeout = &nf_generic_pernet(nf_ct_net(ct))->timeout; nf_ct_refresh_acct(ct, ctinfo, skb, *timeout); return NF_ACCEPT; @@ -72,7 +67,7 @@ static int generic_packet(struct nf_conn *ct, static int generic_timeout_nlattr_to_obj(struct nlattr *tb[], struct net *net, void *data) { - struct nf_generic_net *gn = generic_pernet(net); + struct nf_generic_net *gn = nf_generic_pernet(net); unsigned int *timeout = data; if (!timeout) @@ -138,7 +133,7 @@ static int generic_kmemdup_sysctl_table(struct nf_proto_net *pn, static int generic_init_net(struct net *net) { - struct nf_generic_net *gn = generic_pernet(net); + struct nf_generic_net *gn = nf_generic_pernet(net); struct nf_proto_net *pn = &gn->pn; gn->timeout = nf_ct_generic_timeout; diff --git a/net/netfilter/nf_conntrack_proto_icmp.c b/net/netfilter/nf_conntrack_proto_icmp.c index 3598520bd19b7..de64d8a5fdfd1 100644 --- a/net/netfilter/nf_conntrack_proto_icmp.c +++ b/net/netfilter/nf_conntrack_proto_icmp.c @@ -25,11 +25,6 @@ static const unsigned int nf_ct_icmp_timeout = 30*HZ; -static inline struct nf_icmp_net *icmp_pernet(struct net *net) -{ - return &net->ct.nf_ct_proto.icmp; -} - static bool icmp_pkt_to_tuple(const struct sk_buff *skb, unsigned int dataoff, struct net *net, struct nf_conntrack_tuple *tuple) { @@ -103,7 +98,7 @@ static int icmp_packet(struct nf_conn *ct, } if (!timeout) - timeout = &icmp_pernet(nf_ct_net(ct))->timeout; + timeout = &nf_icmp_pernet(nf_ct_net(ct))->timeout; nf_ct_refresh_acct(ct, ctinfo, skb, *timeout); return NF_ACCEPT; @@ -275,7 +270,7 @@ static int icmp_timeout_nlattr_to_obj(struct nlattr *tb[], struct net *net, void *data) { unsigned int *timeout = data; - struct nf_icmp_net *in = icmp_pernet(net); + struct nf_icmp_net *in = nf_icmp_pernet(net); if (tb[CTA_TIMEOUT_ICMP_TIMEOUT]) { if (!timeout) @@ -337,7 +332,7 @@ static int icmp_kmemdup_sysctl_table(struct nf_proto_net *pn, static int icmp_init_net(struct net *net) { - struct nf_icmp_net *in = icmp_pernet(net); + struct nf_icmp_net *in = nf_icmp_pernet(net); struct nf_proto_net *pn = &in->pn; in->timeout = nf_ct_icmp_timeout; diff --git a/net/netfilter/nf_conntrack_proto_icmpv6.c b/net/netfilter/nf_conntrack_proto_icmpv6.c index 378618feed5da..a15eefb8e3173 100644 --- a/net/netfilter/nf_conntrack_proto_icmpv6.c +++ b/net/netfilter/nf_conntrack_proto_icmpv6.c @@ -30,11 +30,6 @@ static const unsigned int nf_ct_icmpv6_timeout = 30*HZ; -static inline struct nf_icmp_net *icmpv6_pernet(struct net *net) -{ - return &net->ct.nf_ct_proto.icmpv6; -} - static bool icmpv6_pkt_to_tuple(const struct sk_buff *skb, unsigned int dataoff, struct net *net, @@ -87,7 +82,7 @@ static bool icmpv6_invert_tuple(struct nf_conntrack_tuple *tuple, static unsigned int *icmpv6_get_timeouts(struct net *net) { - return &icmpv6_pernet(net)->timeout; + return &nf_icmpv6_pernet(net)->timeout; } /* Returns verdict for packet, or -1 for invalid. */ @@ -286,7 +281,7 @@ static int icmpv6_timeout_nlattr_to_obj(struct nlattr *tb[], struct net *net, void *data) { unsigned int *timeout = data; - struct nf_icmp_net *in = icmpv6_pernet(net); + struct nf_icmp_net *in = nf_icmpv6_pernet(net); if (!timeout) timeout = icmpv6_get_timeouts(net); @@ -348,7 +343,7 @@ static int icmpv6_kmemdup_sysctl_table(struct nf_proto_net *pn, static int icmpv6_init_net(struct net *net) { - struct nf_icmp_net *in = icmpv6_pernet(net); + struct nf_icmp_net *in = nf_icmpv6_pernet(net); struct nf_proto_net *pn = &in->pn; in->timeout = nf_ct_icmpv6_timeout; diff --git a/net/netfilter/nf_conntrack_proto_sctp.c b/net/netfilter/nf_conntrack_proto_sctp.c index 3d719d3eb9a38..d53e3e78f6052 100644 --- a/net/netfilter/nf_conntrack_proto_sctp.c +++ b/net/netfilter/nf_conntrack_proto_sctp.c @@ -146,11 +146,6 @@ static const u8 sctp_conntracks[2][11][SCTP_CONNTRACK_MAX] = { } }; -static inline struct nf_sctp_net *sctp_pernet(struct net *net) -{ - return &net->ct.nf_ct_proto.sctp; -} - #ifdef CONFIG_NF_CONNTRACK_PROCFS /* Print out the private part of the conntrack. */ static void sctp_print_conntrack(struct seq_file *s, struct nf_conn *ct) @@ -480,7 +475,7 @@ static int sctp_packet(struct nf_conn *ct, timeouts = nf_ct_timeout_lookup(ct); if (!timeouts) - timeouts = sctp_pernet(nf_ct_net(ct))->timeouts; + timeouts = nf_sctp_pernet(nf_ct_net(ct))->timeouts; nf_ct_refresh_acct(ct, ctinfo, skb, timeouts[new_state]); @@ -599,7 +594,7 @@ static int sctp_timeout_nlattr_to_obj(struct nlattr *tb[], struct net *net, void *data) { unsigned int *timeouts = data; - struct nf_sctp_net *sn = sctp_pernet(net); + struct nf_sctp_net *sn = nf_sctp_pernet(net); int i; /* set default SCTP timeouts. */ @@ -736,7 +731,7 @@ static int sctp_kmemdup_sysctl_table(struct nf_proto_net *pn, static int sctp_init_net(struct net *net) { - struct nf_sctp_net *sn = sctp_pernet(net); + struct nf_sctp_net *sn = nf_sctp_pernet(net); struct nf_proto_net *pn = &sn->pn; if (!pn->users) { diff --git a/net/netfilter/nf_conntrack_proto_tcp.c b/net/netfilter/nf_conntrack_proto_tcp.c index 1bcf9984d45e8..4dcbd51a8e97f 100644 --- a/net/netfilter/nf_conntrack_proto_tcp.c +++ b/net/netfilter/nf_conntrack_proto_tcp.c @@ -272,11 +272,6 @@ static const u8 tcp_conntracks[2][6][TCP_CONNTRACK_MAX] = { } }; -static inline struct nf_tcp_net *tcp_pernet(struct net *net) -{ - return &net->ct.nf_ct_proto.tcp; -} - #ifdef CONFIG_NF_CONNTRACK_PROCFS /* Print out the private part of the conntrack. */ static void tcp_print_conntrack(struct seq_file *s, struct nf_conn *ct) @@ -475,7 +470,7 @@ static bool tcp_in_window(const struct nf_conn *ct, const struct tcphdr *tcph) { struct net *net = nf_ct_net(ct); - struct nf_tcp_net *tn = tcp_pernet(net); + struct nf_tcp_net *tn = nf_tcp_pernet(net); struct ip_ct_tcp_state *sender = &state->seen[dir]; struct ip_ct_tcp_state *receiver = &state->seen[!dir]; const struct nf_conntrack_tuple *tuple = &ct->tuplehash[dir].tuple; @@ -767,7 +762,7 @@ static noinline bool tcp_new(struct nf_conn *ct, const struct sk_buff *skb, { enum tcp_conntrack new_state; struct net *net = nf_ct_net(ct); - const struct nf_tcp_net *tn = tcp_pernet(net); + const struct nf_tcp_net *tn = nf_tcp_pernet(net); const struct ip_ct_tcp_state *sender = &ct->proto.tcp.seen[0]; const struct ip_ct_tcp_state *receiver = &ct->proto.tcp.seen[1]; @@ -841,7 +836,7 @@ static int tcp_packet(struct nf_conn *ct, const struct nf_hook_state *state) { struct net *net = nf_ct_net(ct); - struct nf_tcp_net *tn = tcp_pernet(net); + struct nf_tcp_net *tn = nf_tcp_pernet(net); struct nf_conntrack_tuple *tuple; enum tcp_conntrack new_state, old_state; unsigned int index, *timeouts; @@ -1283,7 +1278,7 @@ static unsigned int tcp_nlattr_tuple_size(void) static int tcp_timeout_nlattr_to_obj(struct nlattr *tb[], struct net *net, void *data) { - struct nf_tcp_net *tn = tcp_pernet(net); + struct nf_tcp_net *tn = nf_tcp_pernet(net); unsigned int *timeouts = data; int i; @@ -1508,7 +1503,7 @@ static int tcp_kmemdup_sysctl_table(struct nf_proto_net *pn, static int tcp_init_net(struct net *net) { - struct nf_tcp_net *tn = tcp_pernet(net); + struct nf_tcp_net *tn = nf_tcp_pernet(net); struct nf_proto_net *pn = &tn->pn; if (!pn->users) { diff --git a/net/netfilter/nf_conntrack_proto_udp.c b/net/netfilter/nf_conntrack_proto_udp.c index a7aa70370913c..c879d8d78cfde 100644 --- a/net/netfilter/nf_conntrack_proto_udp.c +++ b/net/netfilter/nf_conntrack_proto_udp.c @@ -32,14 +32,9 @@ static const unsigned int udp_timeouts[UDP_CT_MAX] = { [UDP_CT_REPLIED] = 180*HZ, }; -static inline struct nf_udp_net *udp_pernet(struct net *net) -{ - return &net->ct.nf_ct_proto.udp; -} - static unsigned int *udp_get_timeouts(struct net *net) { - return udp_pernet(net)->timeouts; + return nf_udp_pernet(net)->timeouts; } static void udp_error_log(const struct sk_buff *skb, @@ -212,7 +207,7 @@ static int udp_timeout_nlattr_to_obj(struct nlattr *tb[], struct net *net, void *data) { unsigned int *timeouts = data; - struct nf_udp_net *un = udp_pernet(net); + struct nf_udp_net *un = nf_udp_pernet(net); if (!timeouts) timeouts = un->timeouts; @@ -292,7 +287,7 @@ static int udp_kmemdup_sysctl_table(struct nf_proto_net *pn, static int udp_init_net(struct net *net) { - struct nf_udp_net *un = udp_pernet(net); + struct nf_udp_net *un = nf_udp_pernet(net); struct nf_proto_net *pn = &un->pn; if (!pn->users) { From 8866df9264a34e675b4ee8a151db819b87cce2d3 Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso <pablo@netfilter.org> Date: Fri, 2 Nov 2018 00:14:00 +0100 Subject: [PATCH 051/443] netfilter: nfnetlink_cttimeout: pass default timeout policy to obj_to_nlattr Otherwise, we hit a NULL pointer deference since handlers always assume default timeout policy is passed. netlink: 24 bytes leftover after parsing attributes in process `syz-executor2'. kasan: CONFIG_KASAN_INLINE enabled kasan: GPF could be caused by NULL-ptr deref or user memory access general protection fault: 0000 [#1] PREEMPT SMP KASAN CPU: 0 PID: 9575 Comm: syz-executor1 Not tainted 4.19.0+ #312 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011 RIP: 0010:icmp_timeout_obj_to_nlattr+0x77/0x170 net/netfilter/nf_conntrack_proto_icmp.c:297 Fixes: c779e849608a ("netfilter: conntrack: remove get_timeout() indirection") Reported-by: Eric Dumazet <eric.dumazet@gmail.com> Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org> --- net/netfilter/nfnetlink_cttimeout.c | 47 ++++++++++++++++++++++++----- 1 file changed, 40 insertions(+), 7 deletions(-) diff --git a/net/netfilter/nfnetlink_cttimeout.c b/net/netfilter/nfnetlink_cttimeout.c index e7a50af1b3d61..a518eb162344e 100644 --- a/net/netfilter/nfnetlink_cttimeout.c +++ b/net/netfilter/nfnetlink_cttimeout.c @@ -382,7 +382,8 @@ static int cttimeout_default_set(struct net *net, struct sock *ctnl, static int cttimeout_default_fill_info(struct net *net, struct sk_buff *skb, u32 portid, u32 seq, u32 type, int event, u16 l3num, - const struct nf_conntrack_l4proto *l4proto) + const struct nf_conntrack_l4proto *l4proto, + const unsigned int *timeouts) { struct nlmsghdr *nlh; struct nfgenmsg *nfmsg; @@ -408,7 +409,7 @@ cttimeout_default_fill_info(struct net *net, struct sk_buff *skb, u32 portid, if (!nest_parms) goto nla_put_failure; - ret = l4proto->ctnl_timeout.obj_to_nlattr(skb, NULL); + ret = l4proto->ctnl_timeout.obj_to_nlattr(skb, timeouts); if (ret < 0) goto nla_put_failure; @@ -430,6 +431,7 @@ static int cttimeout_default_get(struct net *net, struct sock *ctnl, struct netlink_ext_ack *extack) { const struct nf_conntrack_l4proto *l4proto; + unsigned int *timeouts = NULL; struct sk_buff *skb2; int ret, err; __u16 l3num; @@ -442,12 +444,44 @@ static int cttimeout_default_get(struct net *net, struct sock *ctnl, l4num = nla_get_u8(cda[CTA_TIMEOUT_L4PROTO]); l4proto = nf_ct_l4proto_find_get(l4num); - /* This protocol is not supported, skip. */ - if (l4proto->l4proto != l4num) { - err = -EOPNOTSUPP; + err = -EOPNOTSUPP; + if (l4proto->l4proto != l4num) goto err; + + switch (l4proto->l4proto) { + case IPPROTO_ICMP: + timeouts = &nf_icmp_pernet(net)->timeout; + break; + case IPPROTO_TCP: + timeouts = nf_tcp_pernet(net)->timeouts; + break; + case IPPROTO_UDP: + timeouts = nf_udp_pernet(net)->timeouts; + break; + case IPPROTO_DCCP: +#ifdef CONFIG_NF_CT_PROTO_DCCP + timeouts = nf_dccp_pernet(net)->dccp_timeout; +#endif + break; + case IPPROTO_ICMPV6: + timeouts = &nf_icmpv6_pernet(net)->timeout; + break; + case IPPROTO_SCTP: +#ifdef CONFIG_NF_CT_PROTO_SCTP + timeouts = nf_sctp_pernet(net)->timeouts; +#endif + break; + case 255: + timeouts = &nf_generic_pernet(net)->timeout; + break; + default: + WARN_ON_ONCE(1); + break; } + if (!timeouts) + goto err; + skb2 = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); if (skb2 == NULL) { err = -ENOMEM; @@ -458,8 +492,7 @@ static int cttimeout_default_get(struct net *net, struct sock *ctnl, nlh->nlmsg_seq, NFNL_MSG_TYPE(nlh->nlmsg_type), IPCTNL_MSG_TIMEOUT_DEFAULT_SET, - l3num, - l4proto); + l3num, l4proto, timeouts); if (ret <= 0) { kfree_skb(skb2); err = -ENOMEM; From e4844c9c62a0fe47980d6c3d4b7a096a5d755925 Mon Sep 17 00:00:00 2001 From: Florian Westphal <fw@strlen.de> Date: Fri, 2 Nov 2018 11:33:37 +0100 Subject: [PATCH 052/443] netfilter: nft_compat: ebtables 'nat' table is normal chain type Unlike ip(6)tables, the ebtables nat table has no special properties. This bug causes 'ebtables -A' to fail when using a target such as 'snat' (ebt_snat target sets ".table = "nat"'). Targets that have no table restrictions work fine. Signed-off-by: Florian Westphal <fw@strlen.de> Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org> --- net/netfilter/nft_compat.c | 21 ++++++++++++--------- 1 file changed, 12 insertions(+), 9 deletions(-) diff --git a/net/netfilter/nft_compat.c b/net/netfilter/nft_compat.c index 768292eac2a46..9d0ede4742240 100644 --- a/net/netfilter/nft_compat.c +++ b/net/netfilter/nft_compat.c @@ -54,9 +54,11 @@ static bool nft_xt_put(struct nft_xt *xt) return false; } -static int nft_compat_chain_validate_dependency(const char *tablename, - const struct nft_chain *chain) +static int nft_compat_chain_validate_dependency(const struct nft_ctx *ctx, + const char *tablename) { + enum nft_chain_types type = NFT_CHAIN_T_DEFAULT; + const struct nft_chain *chain = ctx->chain; const struct nft_base_chain *basechain; if (!tablename || @@ -64,9 +66,12 @@ static int nft_compat_chain_validate_dependency(const char *tablename, return 0; basechain = nft_base_chain(chain); - if (strcmp(tablename, "nat") == 0 && - basechain->type->type != NFT_CHAIN_T_NAT) - return -EINVAL; + if (strcmp(tablename, "nat") == 0) { + if (ctx->family != NFPROTO_BRIDGE) + type = NFT_CHAIN_T_NAT; + if (basechain->type->type != type) + return -EINVAL; + } return 0; } @@ -342,8 +347,7 @@ static int nft_target_validate(const struct nft_ctx *ctx, if (target->hooks && !(hook_mask & target->hooks)) return -EINVAL; - ret = nft_compat_chain_validate_dependency(target->table, - ctx->chain); + ret = nft_compat_chain_validate_dependency(ctx, target->table); if (ret < 0) return ret; } @@ -590,8 +594,7 @@ static int nft_match_validate(const struct nft_ctx *ctx, if (match->hooks && !(hook_mask & match->hooks)) return -EINVAL; - ret = nft_compat_chain_validate_dependency(match->table, - ctx->chain); + ret = nft_compat_chain_validate_dependency(ctx, match->table); if (ret < 0) return ret; } From f393808dc64149ccd0e5a8427505ba2974a59854 Mon Sep 17 00:00:00 2001 From: Vasily Khoruzhick <vasilykh@arista.com> Date: Thu, 25 Oct 2018 12:15:43 -0700 Subject: [PATCH 053/443] netfilter: conntrack: fix calculation of next bucket number in early_drop If there's no entry to drop in bucket that corresponds to the hash, early_drop() should look for it in other buckets. But since it increments hash instead of bucket number, it actually looks in the same bucket 8 times: hsize is 16k by default (14 bits) and hash is 32-bit value, so reciprocal_scale(hash, hsize) returns the same value for hash..hash+7 in most cases. Fix it by increasing bucket number instead of hash and rename _hash to bucket to avoid future confusion. Fixes: 3e86638e9a0b ("netfilter: conntrack: consider ct netns in early_drop logic") Cc: <stable@vger.kernel.org> # v4.7+ Signed-off-by: Vasily Khoruzhick <vasilykh@arista.com> Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org> --- net/netfilter/nf_conntrack_core.c | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c index ca1168d67fac6..e92e749aff53e 100644 --- a/net/netfilter/nf_conntrack_core.c +++ b/net/netfilter/nf_conntrack_core.c @@ -1073,19 +1073,22 @@ static unsigned int early_drop_list(struct net *net, return drops; } -static noinline int early_drop(struct net *net, unsigned int _hash) +static noinline int early_drop(struct net *net, unsigned int hash) { - unsigned int i; + unsigned int i, bucket; for (i = 0; i < NF_CT_EVICTION_RANGE; i++) { struct hlist_nulls_head *ct_hash; - unsigned int hash, hsize, drops; + unsigned int hsize, drops; rcu_read_lock(); nf_conntrack_get_ht(&ct_hash, &hsize); - hash = reciprocal_scale(_hash++, hsize); + if (!i) + bucket = reciprocal_scale(hash, hsize); + else + bucket = (bucket + 1) % hsize; - drops = early_drop_list(net, &ct_hash[hash]); + drops = early_drop_list(net, &ct_hash[bucket]); rcu_read_unlock(); if (drops) { From e19e5be8b4cafa8b3f8b0cd1b1dfe20fa0145b83 Mon Sep 17 00:00:00 2001 From: Julian Wiedmann <jwi@linux.ibm.com> Date: Fri, 2 Nov 2018 19:04:08 +0100 Subject: [PATCH 054/443] s390/qeth: sanitize strings in debug messages As Documentation/s390/s390dbf.txt states quite clearly, using any pointer in sprinf-formatted s390dbf debug entries is dangerous. The pointers are dereferenced whenever the trace file is read from. So if the referenced data has a shorter life-time than the trace file, any read operation can result in a use-after-free. So rip out all hazardous use of indirect data, and replace any usage of dev_name() and such by the Bus ID number. Signed-off-by: Julian Wiedmann <jwi@linux.ibm.com> Signed-off-by: David S. Miller <davem@davemloft.net> --- drivers/s390/net/qeth_core.h | 15 +++- drivers/s390/net/qeth_core_main.c | 127 ++++++++++++++---------------- drivers/s390/net/qeth_l2_main.c | 24 +++--- drivers/s390/net/qeth_l3_main.c | 104 ++++++++---------------- 4 files changed, 119 insertions(+), 151 deletions(-) diff --git a/drivers/s390/net/qeth_core.h b/drivers/s390/net/qeth_core.h index 6843bc7ee9f24..884ba9dfb3416 100644 --- a/drivers/s390/net/qeth_core.h +++ b/drivers/s390/net/qeth_core.h @@ -87,6 +87,18 @@ struct qeth_dbf_info { #define SENSE_RESETTING_EVENT_BYTE 1 #define SENSE_RESETTING_EVENT_FLAG 0x80 +static inline u32 qeth_get_device_id(struct ccw_device *cdev) +{ + struct ccw_dev_id dev_id; + u32 id; + + ccw_device_get_id(cdev, &dev_id); + id = dev_id.devno; + id |= (u32) (dev_id.ssid << 16); + + return id; +} + /* * Common IO related definitions */ @@ -97,7 +109,8 @@ struct qeth_dbf_info { #define CARD_RDEV_ID(card) dev_name(&card->read.ccwdev->dev) #define CARD_WDEV_ID(card) dev_name(&card->write.ccwdev->dev) #define CARD_DDEV_ID(card) dev_name(&card->data.ccwdev->dev) -#define CHANNEL_ID(channel) dev_name(&channel->ccwdev->dev) +#define CCW_DEVID(cdev) (qeth_get_device_id(cdev)) +#define CARD_DEVID(card) (CCW_DEVID(CARD_RDEV(card))) /** * card stuff diff --git a/drivers/s390/net/qeth_core_main.c b/drivers/s390/net/qeth_core_main.c index 3274f13aad576..639ac0aca1e9d 100644 --- a/drivers/s390/net/qeth_core_main.c +++ b/drivers/s390/net/qeth_core_main.c @@ -554,8 +554,8 @@ static int __qeth_issue_next_read(struct qeth_card *card) if (!iob) { dev_warn(&card->gdev->dev, "The qeth device driver " "failed to recover an error on the device\n"); - QETH_DBF_MESSAGE(2, "%s issue_next_read failed: no iob " - "available\n", dev_name(&card->gdev->dev)); + QETH_DBF_MESSAGE(2, "issue_next_read on device %x failed: no iob available\n", + CARD_DEVID(card)); return -ENOMEM; } qeth_setup_ccw(channel->ccw, CCW_CMD_READ, QETH_BUFSIZE, iob->data); @@ -563,8 +563,8 @@ static int __qeth_issue_next_read(struct qeth_card *card) rc = ccw_device_start(channel->ccwdev, channel->ccw, (addr_t) iob, 0, 0); if (rc) { - QETH_DBF_MESSAGE(2, "%s error in starting next read ccw! " - "rc=%i\n", dev_name(&card->gdev->dev), rc); + QETH_DBF_MESSAGE(2, "error %i on device %x when starting next read ccw!\n", + rc, CARD_DEVID(card)); atomic_set(&channel->irq_pending, 0); card->read_or_write_problem = 1; qeth_schedule_recovery(card); @@ -613,16 +613,14 @@ static void qeth_issue_ipa_msg(struct qeth_ipa_cmd *cmd, int rc, const char *ipa_name; int com = cmd->hdr.command; ipa_name = qeth_get_ipa_cmd_name(com); + if (rc) - QETH_DBF_MESSAGE(2, "IPA: %s(x%X) for %s/%s returned " - "x%X \"%s\"\n", - ipa_name, com, dev_name(&card->gdev->dev), - QETH_CARD_IFNAME(card), rc, - qeth_get_ipa_msg(rc)); + QETH_DBF_MESSAGE(2, "IPA: %s(%#x) for device %x returned %#x \"%s\"\n", + ipa_name, com, CARD_DEVID(card), rc, + qeth_get_ipa_msg(rc)); else - QETH_DBF_MESSAGE(5, "IPA: %s(x%X) for %s/%s succeeded\n", - ipa_name, com, dev_name(&card->gdev->dev), - QETH_CARD_IFNAME(card)); + QETH_DBF_MESSAGE(5, "IPA: %s(%#x) for device %x succeeded\n", + ipa_name, com, CARD_DEVID(card)); } static struct qeth_ipa_cmd *qeth_check_ipa_data(struct qeth_card *card, @@ -711,7 +709,7 @@ static int qeth_check_idx_response(struct qeth_card *card, QETH_DBF_HEX(CTRL, 2, buffer, QETH_DBF_CTRL_LEN); if ((buffer[2] & 0xc0) == 0xc0) { - QETH_DBF_MESSAGE(2, "received an IDX TERMINATE with cause code %#02x\n", + QETH_DBF_MESSAGE(2, "received an IDX TERMINATE with cause code %#04x\n", buffer[4]); QETH_CARD_TEXT(card, 2, "ckidxres"); QETH_CARD_TEXT(card, 2, " idxterm"); @@ -972,8 +970,8 @@ static int qeth_get_problem(struct qeth_card *card, struct ccw_device *cdev, QETH_CARD_TEXT(card, 2, "CGENCHK"); dev_warn(&cdev->dev, "The qeth device driver " "failed to recover an error on the device\n"); - QETH_DBF_MESSAGE(2, "%s check on device dstat=x%x, cstat=x%x\n", - dev_name(&cdev->dev), dstat, cstat); + QETH_DBF_MESSAGE(2, "check on channel %x with dstat=%#x, cstat=%#x\n", + CCW_DEVID(cdev), dstat, cstat); print_hex_dump(KERN_WARNING, "qeth: irb ", DUMP_PREFIX_OFFSET, 16, 1, irb, 64, 1); return 1; @@ -1013,8 +1011,8 @@ static long qeth_check_irb_error(struct qeth_card *card, switch (PTR_ERR(irb)) { case -EIO: - QETH_DBF_MESSAGE(2, "%s i/o-error on device\n", - dev_name(&cdev->dev)); + QETH_DBF_MESSAGE(2, "i/o-error on channel %x\n", + CCW_DEVID(cdev)); QETH_CARD_TEXT(card, 2, "ckirberr"); QETH_CARD_TEXT_(card, 2, " rc%d", -EIO); break; @@ -1031,8 +1029,8 @@ static long qeth_check_irb_error(struct qeth_card *card, } break; default: - QETH_DBF_MESSAGE(2, "%s unknown error %ld on device\n", - dev_name(&cdev->dev), PTR_ERR(irb)); + QETH_DBF_MESSAGE(2, "unknown error %ld on channel %x\n", + PTR_ERR(irb), CCW_DEVID(cdev)); QETH_CARD_TEXT(card, 2, "ckirberr"); QETH_CARD_TEXT(card, 2, " rc???"); } @@ -1114,9 +1112,9 @@ static void qeth_irq(struct ccw_device *cdev, unsigned long intparm, dev_warn(&channel->ccwdev->dev, "The qeth device driver failed to recover " "an error on the device\n"); - QETH_DBF_MESSAGE(2, "%s sense data available. cstat " - "0x%X dstat 0x%X\n", - dev_name(&channel->ccwdev->dev), cstat, dstat); + QETH_DBF_MESSAGE(2, "sense data available on channel %x: cstat %#X dstat %#X\n", + CCW_DEVID(channel->ccwdev), cstat, + dstat); print_hex_dump(KERN_WARNING, "qeth: irb ", DUMP_PREFIX_OFFSET, 16, 1, irb, 32, 1); print_hex_dump(KERN_WARNING, "qeth: sense data ", @@ -1890,8 +1888,8 @@ static int qeth_idx_activate_channel(struct qeth_card *card, if (channel->state != CH_STATE_ACTIVATING) { dev_warn(&channel->ccwdev->dev, "The qeth device driver" " failed to recover an error on the device\n"); - QETH_DBF_MESSAGE(2, "%s IDX activate timed out\n", - dev_name(&channel->ccwdev->dev)); + QETH_DBF_MESSAGE(2, "IDX activate timed out on channel %x\n", + CCW_DEVID(channel->ccwdev)); QETH_DBF_TEXT_(SETUP, 2, "2err%d", -ETIME); return -ETIME; } @@ -1926,17 +1924,15 @@ static void qeth_idx_write_cb(struct qeth_card *card, "The adapter is used exclusively by another " "host\n"); else - QETH_DBF_MESSAGE(2, "%s IDX_ACTIVATE on write channel:" - " negative reply\n", - dev_name(&channel->ccwdev->dev)); + QETH_DBF_MESSAGE(2, "IDX_ACTIVATE on channel %x: negative reply\n", + CCW_DEVID(channel->ccwdev)); goto out; } memcpy(&temp, QETH_IDX_ACT_FUNC_LEVEL(iob->data), 2); if ((temp & ~0x0100) != qeth_peer_func_level(card->info.func_level)) { - QETH_DBF_MESSAGE(2, "%s IDX_ACTIVATE on write channel: " - "function level mismatch (sent: 0x%x, received: " - "0x%x)\n", dev_name(&channel->ccwdev->dev), - card->info.func_level, temp); + QETH_DBF_MESSAGE(2, "IDX_ACTIVATE on channel %x: function level mismatch (sent: %#x, received: %#x)\n", + CCW_DEVID(channel->ccwdev), + card->info.func_level, temp); goto out; } channel->state = CH_STATE_UP; @@ -1973,9 +1969,8 @@ static void qeth_idx_read_cb(struct qeth_card *card, "insufficient authorization\n"); break; default: - QETH_DBF_MESSAGE(2, "%s IDX_ACTIVATE on read channel:" - " negative reply\n", - dev_name(&channel->ccwdev->dev)); + QETH_DBF_MESSAGE(2, "IDX_ACTIVATE on channel %x: negative reply\n", + CCW_DEVID(channel->ccwdev)); } QETH_CARD_TEXT_(card, 2, "idxread%c", QETH_IDX_ACT_CAUSE_CODE(iob->data)); @@ -1984,10 +1979,9 @@ static void qeth_idx_read_cb(struct qeth_card *card, memcpy(&temp, QETH_IDX_ACT_FUNC_LEVEL(iob->data), 2); if (temp != qeth_peer_func_level(card->info.func_level)) { - QETH_DBF_MESSAGE(2, "%s IDX_ACTIVATE on read channel: function " - "level mismatch (sent: 0x%x, received: 0x%x)\n", - dev_name(&channel->ccwdev->dev), - card->info.func_level, temp); + QETH_DBF_MESSAGE(2, "IDX_ACTIVATE on channel %x: function level mismatch (sent: %#x, received: %#x)\n", + CCW_DEVID(channel->ccwdev), + card->info.func_level, temp); goto out; } memcpy(&card->token.issuer_rm_r, @@ -2096,9 +2090,8 @@ int qeth_send_control_data(struct qeth_card *card, int len, (addr_t) iob, 0, 0, event_timeout); spin_unlock_irq(get_ccwdev_lock(channel->ccwdev)); if (rc) { - QETH_DBF_MESSAGE(2, "%s qeth_send_control_data: " - "ccw_device_start rc = %i\n", - dev_name(&channel->ccwdev->dev), rc); + QETH_DBF_MESSAGE(2, "qeth_send_control_data on device %x: ccw_device_start rc = %i\n", + CARD_DEVID(card), rc); QETH_CARD_TEXT_(card, 2, " err%d", rc); spin_lock_irq(&card->lock); list_del_init(&reply->list); @@ -2853,8 +2846,8 @@ struct qeth_cmd_buffer *qeth_get_ipacmd_buffer(struct qeth_card *card, } else { dev_warn(&card->gdev->dev, "The qeth driver ran out of channel command buffers\n"); - QETH_DBF_MESSAGE(1, "%s The qeth driver ran out of channel command buffers", - dev_name(&card->gdev->dev)); + QETH_DBF_MESSAGE(1, "device %x ran out of channel command buffers", + CARD_DEVID(card)); } return iob; @@ -2989,10 +2982,9 @@ static int qeth_query_ipassists_cb(struct qeth_card *card, return 0; default: if (cmd->hdr.return_code) { - QETH_DBF_MESSAGE(1, "%s IPA_CMD_QIPASSIST: Unhandled " - "rc=%d\n", - dev_name(&card->gdev->dev), - cmd->hdr.return_code); + QETH_DBF_MESSAGE(1, "IPA_CMD_QIPASSIST on device %x: Unhandled rc=%#x\n", + CARD_DEVID(card), + cmd->hdr.return_code); return 0; } } @@ -3004,8 +2996,8 @@ static int qeth_query_ipassists_cb(struct qeth_card *card, card->options.ipa6.supported_funcs = cmd->hdr.ipa_supported; card->options.ipa6.enabled_funcs = cmd->hdr.ipa_enabled; } else - QETH_DBF_MESSAGE(1, "%s IPA_CMD_QIPASSIST: Flawed LIC detected" - "\n", dev_name(&card->gdev->dev)); + QETH_DBF_MESSAGE(1, "IPA_CMD_QIPASSIST on device %x: Flawed LIC detected\n", + CARD_DEVID(card)); return 0; } @@ -4297,10 +4289,9 @@ static int qeth_setadpparms_set_access_ctrl_cb(struct qeth_card *card, cmd->data.setadapterparms.hdr.return_code); if (cmd->data.setadapterparms.hdr.return_code != SET_ACCESS_CTRL_RC_SUCCESS) - QETH_DBF_MESSAGE(3, "ERR:SET_ACCESS_CTRL(%s,%d)==%d\n", - card->gdev->dev.kobj.name, - access_ctrl_req->subcmd_code, - cmd->data.setadapterparms.hdr.return_code); + QETH_DBF_MESSAGE(3, "ERR:SET_ACCESS_CTRL(%#x) on device %x: %#x\n", + access_ctrl_req->subcmd_code, CARD_DEVID(card), + cmd->data.setadapterparms.hdr.return_code); switch (cmd->data.setadapterparms.hdr.return_code) { case SET_ACCESS_CTRL_RC_SUCCESS: if (card->options.isolation == ISOLATION_MODE_NONE) { @@ -4312,14 +4303,14 @@ static int qeth_setadpparms_set_access_ctrl_cb(struct qeth_card *card, } break; case SET_ACCESS_CTRL_RC_ALREADY_NOT_ISOLATED: - QETH_DBF_MESSAGE(2, "%s QDIO data connection isolation already " - "deactivated\n", dev_name(&card->gdev->dev)); + QETH_DBF_MESSAGE(2, "QDIO data connection isolation on device %x already deactivated\n", + CARD_DEVID(card)); if (fallback) card->options.isolation = card->options.prev_isolation; break; case SET_ACCESS_CTRL_RC_ALREADY_ISOLATED: - QETH_DBF_MESSAGE(2, "%s QDIO data connection isolation already" - " activated\n", dev_name(&card->gdev->dev)); + QETH_DBF_MESSAGE(2, "QDIO data connection isolation on device %x already activated\n", + CARD_DEVID(card)); if (fallback) card->options.isolation = card->options.prev_isolation; break; @@ -4405,10 +4396,8 @@ int qeth_set_access_ctrl_online(struct qeth_card *card, int fallback) rc = qeth_setadpparms_set_access_ctrl(card, card->options.isolation, fallback); if (rc) { - QETH_DBF_MESSAGE(3, - "IPA(SET_ACCESS_CTRL,%s,%d) sent failed\n", - card->gdev->dev.kobj.name, - rc); + QETH_DBF_MESSAGE(3, "IPA(SET_ACCESS_CTRL(%d) on device %x: sent failed\n", + rc, CARD_DEVID(card)); rc = -EOPNOTSUPP; } } else if (card->options.isolation != ISOLATION_MODE_NONE) { @@ -4634,8 +4623,8 @@ static int qeth_snmp_command(struct qeth_card *card, char __user *udata) rc = qeth_send_ipa_snmp_cmd(card, iob, QETH_SETADP_BASE_LEN + req_len, qeth_snmp_command_cb, (void *)&qinfo); if (rc) - QETH_DBF_MESSAGE(2, "SNMP command failed on %s: (0x%x)\n", - QETH_CARD_IFNAME(card), rc); + QETH_DBF_MESSAGE(2, "SNMP command failed on device %x: (%#x)\n", + CARD_DEVID(card), rc); else { if (copy_to_user(udata, qinfo.udata, qinfo.udata_len)) rc = -EFAULT; @@ -4869,8 +4858,8 @@ static void qeth_determine_capabilities(struct qeth_card *card) rc = qeth_read_conf_data(card, (void **) &prcd, &length); if (rc) { - QETH_DBF_MESSAGE(2, "%s qeth_read_conf_data returned %i\n", - dev_name(&card->gdev->dev), rc); + QETH_DBF_MESSAGE(2, "qeth_read_conf_data on device %x returned %i\n", + CARD_DEVID(card), rc); QETH_DBF_TEXT_(SETUP, 2, "5err%d", rc); goto out_offline; } @@ -5096,8 +5085,8 @@ int qeth_core_hardsetup_card(struct qeth_card *card) qeth_update_from_chp_desc(card); retry: if (retries < 3) - QETH_DBF_MESSAGE(2, "%s Retrying to do IDX activates.\n", - dev_name(&card->gdev->dev)); + QETH_DBF_MESSAGE(2, "Retrying to do IDX activates on device %x.\n", + CARD_DEVID(card)); rc = qeth_qdio_clear_card(card, card->info.type != QETH_CARD_TYPE_IQD); ccw_device_set_offline(CARD_DDEV(card)); ccw_device_set_offline(CARD_WDEV(card)); @@ -5201,8 +5190,8 @@ int qeth_core_hardsetup_card(struct qeth_card *card) out: dev_warn(&card->gdev->dev, "The qeth device driver failed to recover " "an error on the device\n"); - QETH_DBF_MESSAGE(2, "%s Initialization in hardsetup failed! rc=%d\n", - dev_name(&card->gdev->dev), rc); + QETH_DBF_MESSAGE(2, "Initialization for device %x failed in hardsetup! rc=%d\n", + CARD_DEVID(card), rc); return rc; } EXPORT_SYMBOL_GPL(qeth_core_hardsetup_card); diff --git a/drivers/s390/net/qeth_l2_main.c b/drivers/s390/net/qeth_l2_main.c index 23aaf373f631e..5b67fd1f2b778 100644 --- a/drivers/s390/net/qeth_l2_main.c +++ b/drivers/s390/net/qeth_l2_main.c @@ -146,11 +146,11 @@ static int qeth_l2_write_mac(struct qeth_card *card, u8 *mac) QETH_CARD_TEXT(card, 2, "L2Wmac"); rc = qeth_l2_send_setdelmac(card, mac, cmd); if (rc == -EEXIST) - QETH_DBF_MESSAGE(2, "MAC %pM already registered on %s\n", - mac, QETH_CARD_IFNAME(card)); + QETH_DBF_MESSAGE(2, "MAC already registered on device %x\n", + CARD_DEVID(card)); else if (rc) - QETH_DBF_MESSAGE(2, "Failed to register MAC %pM on %s: %d\n", - mac, QETH_CARD_IFNAME(card), rc); + QETH_DBF_MESSAGE(2, "Failed to register MAC on device %x: %d\n", + CARD_DEVID(card), rc); return rc; } @@ -163,8 +163,8 @@ static int qeth_l2_remove_mac(struct qeth_card *card, u8 *mac) QETH_CARD_TEXT(card, 2, "L2Rmac"); rc = qeth_l2_send_setdelmac(card, mac, cmd); if (rc) - QETH_DBF_MESSAGE(2, "Failed to delete MAC %pM on %s: %d\n", - mac, QETH_CARD_IFNAME(card), rc); + QETH_DBF_MESSAGE(2, "Failed to delete MAC on device %u: %d\n", + CARD_DEVID(card), rc); return rc; } @@ -260,9 +260,9 @@ static int qeth_l2_send_setdelvlan_cb(struct qeth_card *card, QETH_CARD_TEXT(card, 2, "L2sdvcb"); if (cmd->hdr.return_code) { - QETH_DBF_MESSAGE(2, "Error in processing VLAN %i on %s: 0x%x.\n", + QETH_DBF_MESSAGE(2, "Error in processing VLAN %u on device %x: %#x.\n", cmd->data.setdelvlan.vlan_id, - QETH_CARD_IFNAME(card), cmd->hdr.return_code); + CARD_DEVID(card), cmd->hdr.return_code); QETH_CARD_TEXT_(card, 2, "L2VL%4x", cmd->hdr.command); QETH_CARD_TEXT_(card, 2, "err%d", cmd->hdr.return_code); } @@ -455,8 +455,8 @@ static int qeth_l2_request_initial_mac(struct qeth_card *card) rc = qeth_vm_request_mac(card); if (!rc) goto out; - QETH_DBF_MESSAGE(2, "z/VM MAC Service failed on device %s: x%x\n", - CARD_BUS_ID(card), rc); + QETH_DBF_MESSAGE(2, "z/VM MAC Service failed on device %x: %#x\n", + CARD_DEVID(card), rc); QETH_DBF_TEXT_(SETUP, 2, "err%04x", rc); /* fall back to alternative mechanism: */ } @@ -468,8 +468,8 @@ static int qeth_l2_request_initial_mac(struct qeth_card *card) rc = qeth_setadpparms_change_macaddr(card); if (!rc) goto out; - QETH_DBF_MESSAGE(2, "READ_MAC Assist failed on device %s: x%x\n", - CARD_BUS_ID(card), rc); + QETH_DBF_MESSAGE(2, "READ_MAC Assist failed on device %x: %#x\n", + CARD_DEVID(card), rc); QETH_DBF_TEXT_(SETUP, 2, "1err%04x", rc); /* fall back once more: */ } diff --git a/drivers/s390/net/qeth_l3_main.c b/drivers/s390/net/qeth_l3_main.c index 0b161cc1fd2e6..ffa2aa1dd4c51 100644 --- a/drivers/s390/net/qeth_l3_main.c +++ b/drivers/s390/net/qeth_l3_main.c @@ -494,9 +494,8 @@ int qeth_l3_setrouting_v4(struct qeth_card *card) QETH_PROT_IPV4); if (rc) { card->options.route4.type = NO_ROUTER; - QETH_DBF_MESSAGE(2, "Error (0x%04x) while setting routing type" - " on %s. Type set to 'no router'.\n", rc, - QETH_CARD_IFNAME(card)); + QETH_DBF_MESSAGE(2, "Error (%#06x) while setting routing type on device %x. Type set to 'no router'.\n", + rc, CARD_DEVID(card)); } return rc; } @@ -518,9 +517,8 @@ int qeth_l3_setrouting_v6(struct qeth_card *card) QETH_PROT_IPV6); if (rc) { card->options.route6.type = NO_ROUTER; - QETH_DBF_MESSAGE(2, "Error (0x%04x) while setting routing type" - " on %s. Type set to 'no router'.\n", rc, - QETH_CARD_IFNAME(card)); + QETH_DBF_MESSAGE(2, "Error (%#06x) while setting routing type on device %x. Type set to 'no router'.\n", + rc, CARD_DEVID(card)); } return rc; } @@ -1070,8 +1068,8 @@ qeth_diags_trace_cb(struct qeth_card *card, struct qeth_reply *reply, } break; default: - QETH_DBF_MESSAGE(2, "Unknown sniffer action (0x%04x) on %s\n", - cmd->data.diagass.action, QETH_CARD_IFNAME(card)); + QETH_DBF_MESSAGE(2, "Unknown sniffer action (%#06x) on device %x\n", + cmd->data.diagass.action, CARD_DEVID(card)); } return 0; @@ -1517,32 +1515,25 @@ static void qeth_l3_set_rx_mode(struct net_device *dev) qeth_l3_handle_promisc_mode(card); } -static const char *qeth_l3_arp_get_error_cause(int *rc) +static int qeth_l3_arp_makerc(int rc) { - switch (*rc) { - case QETH_IPA_ARP_RC_FAILED: - *rc = -EIO; - return "operation failed"; + switch (rc) { + case IPA_RC_SUCCESS: + return 0; case QETH_IPA_ARP_RC_NOTSUPP: - *rc = -EOPNOTSUPP; - return "operation not supported"; - case QETH_IPA_ARP_RC_OUT_OF_RANGE: - *rc = -EINVAL; - return "argument out of range"; case QETH_IPA_ARP_RC_Q_NOTSUPP: - *rc = -EOPNOTSUPP; - return "query operation not supported"; + return -EOPNOTSUPP; + case QETH_IPA_ARP_RC_OUT_OF_RANGE: + return -EINVAL; case QETH_IPA_ARP_RC_Q_NO_DATA: - *rc = -ENOENT; - return "no query data available"; + return -ENOENT; default: - return "unknown error"; + return -EIO; } } static int qeth_l3_arp_set_no_entries(struct qeth_card *card, int no_entries) { - int tmp; int rc; QETH_CARD_TEXT(card, 3, "arpstnoe"); @@ -1560,13 +1551,10 @@ static int qeth_l3_arp_set_no_entries(struct qeth_card *card, int no_entries) rc = qeth_send_simple_setassparms(card, IPA_ARP_PROCESSING, IPA_CMD_ASS_ARP_SET_NO_ENTRIES, no_entries); - if (rc) { - tmp = rc; - QETH_DBF_MESSAGE(2, "Could not set number of ARP entries on " - "%s: %s (0x%x/%d)\n", QETH_CARD_IFNAME(card), - qeth_l3_arp_get_error_cause(&rc), tmp, tmp); - } - return rc; + if (rc) + QETH_DBF_MESSAGE(2, "Could not set number of ARP entries on device %x: %#x\n", + CARD_DEVID(card), rc); + return qeth_l3_arp_makerc(rc); } static __u32 get_arp_entry_size(struct qeth_card *card, @@ -1716,7 +1704,6 @@ static int qeth_l3_query_arp_cache_info(struct qeth_card *card, { struct qeth_cmd_buffer *iob; struct qeth_ipa_cmd *cmd; - int tmp; int rc; QETH_CARD_TEXT_(card, 3, "qarpipv%i", prot); @@ -1735,15 +1722,10 @@ static int qeth_l3_query_arp_cache_info(struct qeth_card *card, rc = qeth_l3_send_ipa_arp_cmd(card, iob, QETH_SETASS_BASE_LEN+QETH_ARP_CMD_LEN, qeth_l3_arp_query_cb, (void *)qinfo); - if (rc) { - tmp = rc; - QETH_DBF_MESSAGE(2, - "Error while querying ARP cache on %s: %s " - "(0x%x/%d)\n", QETH_CARD_IFNAME(card), - qeth_l3_arp_get_error_cause(&rc), tmp, tmp); - } - - return rc; + if (rc) + QETH_DBF_MESSAGE(2, "Error while querying ARP cache on device %x: %#x\n", + CARD_DEVID(card), rc); + return qeth_l3_arp_makerc(rc); } static int qeth_l3_arp_query(struct qeth_card *card, char __user *udata) @@ -1797,8 +1779,6 @@ static int qeth_l3_arp_add_entry(struct qeth_card *card, struct qeth_arp_cache_entry *entry) { struct qeth_cmd_buffer *iob; - char buf[16]; - int tmp; int rc; QETH_CARD_TEXT(card, 3, "arpadent"); @@ -1824,14 +1804,10 @@ static int qeth_l3_arp_add_entry(struct qeth_card *card, sizeof(struct qeth_arp_cache_entry), (unsigned long) entry, qeth_setassparms_cb, NULL); - if (rc) { - tmp = rc; - qeth_l3_ipaddr4_to_string((u8 *)entry->ipaddr, buf); - QETH_DBF_MESSAGE(2, "Could not add ARP entry for address %s " - "on %s: %s (0x%x/%d)\n", buf, QETH_CARD_IFNAME(card), - qeth_l3_arp_get_error_cause(&rc), tmp, tmp); - } - return rc; + if (rc) + QETH_DBF_MESSAGE(2, "Could not add ARP entry on device %x: %#x\n", + CARD_DEVID(card), rc); + return qeth_l3_arp_makerc(rc); } static int qeth_l3_arp_remove_entry(struct qeth_card *card, @@ -1839,7 +1815,6 @@ static int qeth_l3_arp_remove_entry(struct qeth_card *card, { struct qeth_cmd_buffer *iob; char buf[16] = {0, }; - int tmp; int rc; QETH_CARD_TEXT(card, 3, "arprment"); @@ -1864,21 +1839,15 @@ static int qeth_l3_arp_remove_entry(struct qeth_card *card, rc = qeth_send_setassparms(card, iob, 12, (unsigned long)buf, qeth_setassparms_cb, NULL); - if (rc) { - tmp = rc; - memset(buf, 0, 16); - qeth_l3_ipaddr4_to_string((u8 *)entry->ipaddr, buf); - QETH_DBF_MESSAGE(2, "Could not delete ARP entry for address %s" - " on %s: %s (0x%x/%d)\n", buf, QETH_CARD_IFNAME(card), - qeth_l3_arp_get_error_cause(&rc), tmp, tmp); - } - return rc; + if (rc) + QETH_DBF_MESSAGE(2, "Could not delete ARP entry on device %x: %#x\n", + CARD_DEVID(card), rc); + return qeth_l3_arp_makerc(rc); } static int qeth_l3_arp_flush_cache(struct qeth_card *card) { int rc; - int tmp; QETH_CARD_TEXT(card, 3, "arpflush"); @@ -1894,13 +1863,10 @@ static int qeth_l3_arp_flush_cache(struct qeth_card *card) } rc = qeth_send_simple_setassparms(card, IPA_ARP_PROCESSING, IPA_CMD_ASS_ARP_FLUSH_CACHE, 0); - if (rc) { - tmp = rc; - QETH_DBF_MESSAGE(2, "Could not flush ARP cache on %s: %s " - "(0x%x/%d)\n", QETH_CARD_IFNAME(card), - qeth_l3_arp_get_error_cause(&rc), tmp, tmp); - } - return rc; + if (rc) + QETH_DBF_MESSAGE(2, "Could not flush ARP cache on device %x: %#x\n", + CARD_DEVID(card), rc); + return qeth_l3_arp_makerc(rc); } static int qeth_l3_do_ioctl(struct net_device *dev, struct ifreq *rq, int cmd) From bd74a7f9cc033cf4d405788f80292268987dc0c5 Mon Sep 17 00:00:00 2001 From: Julian Wiedmann <jwi@linux.ibm.com> Date: Fri, 2 Nov 2018 19:04:09 +0100 Subject: [PATCH 055/443] s390/qeth: fix HiperSockets sniffer Sniffing mode for L3 HiperSockets requires that no IP addresses are registered with the HW. The preferred way to achieve this is for userspace to delete all the IPs on the interface. But qeth is expected to also tolerate a configuration where that is not the case, by skipping the IP registration when in sniffer mode. Since commit 5f78e29ceebf ("qeth: optimize IP handling in rx_mode callback") reworked the IP registration logic in the L3 subdriver, this no longer works. When the qeth device is set online, qeth_l3_recover_ip() now unconditionally registers all unicast addresses from our internal IP table. While we could fix this particular problem by skipping qeth_l3_recover_ip() on a sniffer device, the more future-proof change is to skip the IP address registration at the lowest level. This way we a) catch any future code path that attempts to register an IP address without considering the sniffer scenario, and b) continue to build up our internal IP table, so that if sniffer mode is switched off later we can operate just like normal. Fixes: 5f78e29ceebf ("qeth: optimize IP handling in rx_mode callback") Signed-off-by: Julian Wiedmann <jwi@linux.ibm.com> Signed-off-by: David S. Miller <davem@davemloft.net> --- drivers/s390/net/qeth_l3_main.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/drivers/s390/net/qeth_l3_main.c b/drivers/s390/net/qeth_l3_main.c index ffa2aa1dd4c51..968e344a240b9 100644 --- a/drivers/s390/net/qeth_l3_main.c +++ b/drivers/s390/net/qeth_l3_main.c @@ -278,9 +278,6 @@ static void qeth_l3_clear_ip_htable(struct qeth_card *card, int recover) QETH_CARD_TEXT(card, 4, "clearip"); - if (recover && card->options.sniffer) - return; - spin_lock_bh(&card->ip_lock); hash_for_each_safe(card->ip_htable, i, tmp, addr, hnode) { @@ -661,6 +658,8 @@ static int qeth_l3_register_addr_entry(struct qeth_card *card, int rc = 0; int cnt = 3; + if (card->options.sniffer) + return 0; if (addr->proto == QETH_PROT_IPV4) { QETH_CARD_TEXT(card, 2, "setaddr4"); @@ -695,6 +694,9 @@ static int qeth_l3_deregister_addr_entry(struct qeth_card *card, { int rc = 0; + if (card->options.sniffer) + return 0; + if (addr->proto == QETH_PROT_IPV4) { QETH_CARD_TEXT(card, 2, "deladdr4"); QETH_CARD_HEX(card, 3, &addr->u.a4.addr, sizeof(int)); From 30356d08159d7899438e94503ae322a8b881e205 Mon Sep 17 00:00:00 2001 From: Julian Wiedmann <jwi@linux.ibm.com> Date: Fri, 2 Nov 2018 19:04:10 +0100 Subject: [PATCH 056/443] s390/qeth: unregister netdevice only when registered qeth only registers its netdevice when the qeth device is first set online. Thus a device that has never been set online will trigger a WARN ("network todo 'hsi%d' but state 0") in unregister_netdev() when removed. Fix this by protecting the unregister step, just like we already protect against repeated registering of the netdevice. Fixes: d3d1b205e89f ("s390/qeth: allocate netdevice early") Reported-by: Karsten Graul <kgraul@linux.ibm.com> Signed-off-by: Julian Wiedmann <jwi@linux.ibm.com> Signed-off-by: David S. Miller <davem@davemloft.net> --- drivers/s390/net/qeth_core.h | 5 +++++ drivers/s390/net/qeth_l2_main.c | 5 +++-- drivers/s390/net/qeth_l3_main.c | 5 +++-- 3 files changed, 11 insertions(+), 4 deletions(-) diff --git a/drivers/s390/net/qeth_core.h b/drivers/s390/net/qeth_core.h index 884ba9dfb3416..b3a0b8838d2f2 100644 --- a/drivers/s390/net/qeth_core.h +++ b/drivers/s390/net/qeth_core.h @@ -843,6 +843,11 @@ struct qeth_trap_id { /*some helper functions*/ #define QETH_CARD_IFNAME(card) (((card)->dev)? (card)->dev->name : "") +static inline bool qeth_netdev_is_registered(struct net_device *dev) +{ + return dev->netdev_ops != NULL; +} + static inline void qeth_scrub_qdio_buffer(struct qdio_buffer *buf, unsigned int elements) { diff --git a/drivers/s390/net/qeth_l2_main.c b/drivers/s390/net/qeth_l2_main.c index 5b67fd1f2b778..2b978eba7e30c 100644 --- a/drivers/s390/net/qeth_l2_main.c +++ b/drivers/s390/net/qeth_l2_main.c @@ -826,7 +826,8 @@ static void qeth_l2_remove_device(struct ccwgroup_device *cgdev) if (cgdev->state == CCWGROUP_ONLINE) qeth_l2_set_offline(cgdev); - unregister_netdev(card->dev); + if (qeth_netdev_is_registered(card->dev)) + unregister_netdev(card->dev); } static const struct ethtool_ops qeth_l2_ethtool_ops = { @@ -866,7 +867,7 @@ static int qeth_l2_setup_netdev(struct qeth_card *card) { int rc; - if (card->dev->netdev_ops) + if (qeth_netdev_is_registered(card->dev)) return 0; card->dev->priv_flags |= IFF_UNICAST_FLT; diff --git a/drivers/s390/net/qeth_l3_main.c b/drivers/s390/net/qeth_l3_main.c index 968e344a240b9..a719c5ec41710 100644 --- a/drivers/s390/net/qeth_l3_main.c +++ b/drivers/s390/net/qeth_l3_main.c @@ -2356,7 +2356,7 @@ static int qeth_l3_setup_netdev(struct qeth_card *card) unsigned int headroom; int rc; - if (card->dev->netdev_ops) + if (qeth_netdev_is_registered(card->dev)) return 0; if (card->info.type == QETH_CARD_TYPE_OSD || @@ -2465,7 +2465,8 @@ static void qeth_l3_remove_device(struct ccwgroup_device *cgdev) if (cgdev->state == CCWGROUP_ONLINE) qeth_l3_set_offline(cgdev); - unregister_netdev(card->dev); + if (qeth_netdev_is_registered(card->dev)) + unregister_netdev(card->dev); qeth_l3_clear_ip_htable(card, 0); qeth_l3_clear_ipato_list(card); } From 9fae5c3b60396b8586881a0e7c028ae5bcaeaa3f Mon Sep 17 00:00:00 2001 From: Julian Wiedmann <jwi@linux.ibm.com> Date: Fri, 2 Nov 2018 19:04:11 +0100 Subject: [PATCH 057/443] s390/qeth: fix initial operstate Setting the carrier 'on' for an unregistered netdevice doesn't update its operstate. Fix this by delaying the update until the netdevice has been registered. Fixes: 91cc98f51e3d ("s390/qeth: remove duplicated carrier state tracking") Signed-off-by: Julian Wiedmann <jwi@linux.ibm.com> Signed-off-by: David S. Miller <davem@davemloft.net> --- drivers/s390/net/qeth_core.h | 2 +- drivers/s390/net/qeth_core_main.c | 13 ++++++++++--- drivers/s390/net/qeth_l2_main.c | 10 +++++++--- drivers/s390/net/qeth_l3_main.c | 10 +++++++--- 4 files changed, 25 insertions(+), 10 deletions(-) diff --git a/drivers/s390/net/qeth_core.h b/drivers/s390/net/qeth_core.h index b3a0b8838d2f2..90cb213b0d558 100644 --- a/drivers/s390/net/qeth_core.h +++ b/drivers/s390/net/qeth_core.h @@ -991,7 +991,7 @@ int qeth_wait_for_threads(struct qeth_card *, unsigned long); int qeth_do_run_thread(struct qeth_card *, unsigned long); void qeth_clear_thread_start_bit(struct qeth_card *, unsigned long); void qeth_clear_thread_running_bit(struct qeth_card *, unsigned long); -int qeth_core_hardsetup_card(struct qeth_card *); +int qeth_core_hardsetup_card(struct qeth_card *card, bool *carrier_ok); void qeth_print_status_message(struct qeth_card *); int qeth_init_qdio_queues(struct qeth_card *); int qeth_send_ipa_cmd(struct qeth_card *, struct qeth_cmd_buffer *, diff --git a/drivers/s390/net/qeth_core_main.c b/drivers/s390/net/qeth_core_main.c index 639ac0aca1e9d..aed1a7961553c 100644 --- a/drivers/s390/net/qeth_core_main.c +++ b/drivers/s390/net/qeth_core_main.c @@ -5075,7 +5075,7 @@ static struct ccw_driver qeth_ccw_driver = { .remove = ccwgroup_remove_ccwdev, }; -int qeth_core_hardsetup_card(struct qeth_card *card) +int qeth_core_hardsetup_card(struct qeth_card *card, bool *carrier_ok) { int retries = 3; int rc; @@ -5150,13 +5150,20 @@ int qeth_core_hardsetup_card(struct qeth_card *card) if (rc == IPA_RC_LAN_OFFLINE) { dev_warn(&card->gdev->dev, "The LAN is offline\n"); - netif_carrier_off(card->dev); + *carrier_ok = false; } else { rc = -ENODEV; goto out; } } else { - netif_carrier_on(card->dev); + *carrier_ok = true; + } + + if (qeth_netdev_is_registered(card->dev)) { + if (*carrier_ok) + netif_carrier_on(card->dev); + else + netif_carrier_off(card->dev); } card->options.ipa4.supported_funcs = 0; diff --git a/drivers/s390/net/qeth_l2_main.c b/drivers/s390/net/qeth_l2_main.c index 2b978eba7e30c..2914a1a69f830 100644 --- a/drivers/s390/net/qeth_l2_main.c +++ b/drivers/s390/net/qeth_l2_main.c @@ -863,7 +863,7 @@ static const struct net_device_ops qeth_l2_netdev_ops = { .ndo_set_features = qeth_set_features }; -static int qeth_l2_setup_netdev(struct qeth_card *card) +static int qeth_l2_setup_netdev(struct qeth_card *card, bool carrier_ok) { int rc; @@ -920,6 +920,9 @@ static int qeth_l2_setup_netdev(struct qeth_card *card) qeth_l2_request_initial_mac(card); netif_napi_add(card->dev, &card->napi, qeth_poll, QETH_NAPI_WEIGHT); rc = register_netdev(card->dev); + if (!rc && carrier_ok) + netif_carrier_on(card->dev); + if (rc) card->dev->netdev_ops = NULL; return rc; @@ -950,6 +953,7 @@ static int __qeth_l2_set_online(struct ccwgroup_device *gdev, int recovery_mode) struct qeth_card *card = dev_get_drvdata(&gdev->dev); int rc = 0; enum qeth_card_states recover_flag; + bool carrier_ok; mutex_lock(&card->discipline_mutex); mutex_lock(&card->conf_mutex); @@ -957,7 +961,7 @@ static int __qeth_l2_set_online(struct ccwgroup_device *gdev, int recovery_mode) QETH_DBF_HEX(SETUP, 2, &card, sizeof(void *)); recover_flag = card->state; - rc = qeth_core_hardsetup_card(card); + rc = qeth_core_hardsetup_card(card, &carrier_ok); if (rc) { QETH_DBF_TEXT_(SETUP, 2, "2err%04x", rc); rc = -ENODEV; @@ -968,7 +972,7 @@ static int __qeth_l2_set_online(struct ccwgroup_device *gdev, int recovery_mode) dev_info(&card->gdev->dev, "The device represents a Bridge Capable Port\n"); - rc = qeth_l2_setup_netdev(card); + rc = qeth_l2_setup_netdev(card, carrier_ok); if (rc) goto out_remove; diff --git a/drivers/s390/net/qeth_l3_main.c b/drivers/s390/net/qeth_l3_main.c index a719c5ec41710..b26f7d7a2ca0d 100644 --- a/drivers/s390/net/qeth_l3_main.c +++ b/drivers/s390/net/qeth_l3_main.c @@ -2351,7 +2351,7 @@ static const struct net_device_ops qeth_l3_osa_netdev_ops = { .ndo_neigh_setup = qeth_l3_neigh_setup, }; -static int qeth_l3_setup_netdev(struct qeth_card *card) +static int qeth_l3_setup_netdev(struct qeth_card *card, bool carrier_ok) { unsigned int headroom; int rc; @@ -2425,6 +2425,9 @@ static int qeth_l3_setup_netdev(struct qeth_card *card) netif_napi_add(card->dev, &card->napi, qeth_poll, QETH_NAPI_WEIGHT); rc = register_netdev(card->dev); + if (!rc && carrier_ok) + netif_carrier_on(card->dev); + out: if (rc) card->dev->netdev_ops = NULL; @@ -2476,6 +2479,7 @@ static int __qeth_l3_set_online(struct ccwgroup_device *gdev, int recovery_mode) struct qeth_card *card = dev_get_drvdata(&gdev->dev); int rc = 0; enum qeth_card_states recover_flag; + bool carrier_ok; mutex_lock(&card->discipline_mutex); mutex_lock(&card->conf_mutex); @@ -2483,14 +2487,14 @@ static int __qeth_l3_set_online(struct ccwgroup_device *gdev, int recovery_mode) QETH_DBF_HEX(SETUP, 2, &card, sizeof(void *)); recover_flag = card->state; - rc = qeth_core_hardsetup_card(card); + rc = qeth_core_hardsetup_card(card, &carrier_ok); if (rc) { QETH_DBF_TEXT_(SETUP, 2, "2err%04x", rc); rc = -ENODEV; goto out_remove; } - rc = qeth_l3_setup_netdev(card); + rc = qeth_l3_setup_netdev(card, carrier_ok); if (rc) goto out_remove; From 125d7d30111738a5bdafacc1ed87cd3d7f32b4ea Mon Sep 17 00:00:00 2001 From: Julian Wiedmann <jwi@linux.ibm.com> Date: Fri, 2 Nov 2018 19:04:12 +0100 Subject: [PATCH 058/443] s390/qeth: sanitize ARP requests The ARP_{ADD,REMOVE}_ENTRY cmd structs contain reserved fields. Introduce a common helper that doesn't raw-copy the user-provided data into the cmd, but only sets those fields that are strictly needed for the command. This also sets the correct command length for ARP_REMOVE_ENTRY. Signed-off-by: Julian Wiedmann <jwi@linux.ibm.com> Signed-off-by: David S. Miller <davem@davemloft.net> --- drivers/s390/net/qeth_core.h | 5 -- drivers/s390/net/qeth_core_main.c | 12 ++-- drivers/s390/net/qeth_core_mpc.h | 2 +- drivers/s390/net/qeth_l3_main.c | 94 +++++++++---------------------- 4 files changed, 34 insertions(+), 79 deletions(-) diff --git a/drivers/s390/net/qeth_core.h b/drivers/s390/net/qeth_core.h index 90cb213b0d558..04e294d1d16d7 100644 --- a/drivers/s390/net/qeth_core.h +++ b/drivers/s390/net/qeth_core.h @@ -1046,11 +1046,6 @@ int qeth_configure_cq(struct qeth_card *, enum qeth_cq); int qeth_hw_trap(struct qeth_card *, enum qeth_diags_trap_action); void qeth_trace_features(struct qeth_card *); void qeth_close_dev(struct qeth_card *); -int qeth_send_setassparms(struct qeth_card *, struct qeth_cmd_buffer *, __u16, - long, - int (*reply_cb)(struct qeth_card *, - struct qeth_reply *, unsigned long), - void *); int qeth_setassparms_cb(struct qeth_card *, struct qeth_reply *, unsigned long); struct qeth_cmd_buffer *qeth_get_setassparms_cmd(struct qeth_card *, enum qeth_ipa_funcs, diff --git a/drivers/s390/net/qeth_core_main.c b/drivers/s390/net/qeth_core_main.c index aed1a7961553c..82282b2092d87 100644 --- a/drivers/s390/net/qeth_core_main.c +++ b/drivers/s390/net/qeth_core_main.c @@ -5477,11 +5477,12 @@ struct qeth_cmd_buffer *qeth_get_setassparms_cmd(struct qeth_card *card, } EXPORT_SYMBOL_GPL(qeth_get_setassparms_cmd); -int qeth_send_setassparms(struct qeth_card *card, - struct qeth_cmd_buffer *iob, __u16 len, long data, - int (*reply_cb)(struct qeth_card *, - struct qeth_reply *, unsigned long), - void *reply_param) +static int qeth_send_setassparms(struct qeth_card *card, + struct qeth_cmd_buffer *iob, u16 len, + long data, int (*reply_cb)(struct qeth_card *, + struct qeth_reply *, + unsigned long), + void *reply_param) { int rc; struct qeth_ipa_cmd *cmd; @@ -5497,7 +5498,6 @@ int qeth_send_setassparms(struct qeth_card *card, rc = qeth_send_ipa_cmd(card, iob, reply_cb, reply_param); return rc; } -EXPORT_SYMBOL_GPL(qeth_send_setassparms); int qeth_send_simple_setassparms_prot(struct qeth_card *card, enum qeth_ipa_funcs ipa_func, diff --git a/drivers/s390/net/qeth_core_mpc.h b/drivers/s390/net/qeth_core_mpc.h index e85090467afe0..80c036acf5630 100644 --- a/drivers/s390/net/qeth_core_mpc.h +++ b/drivers/s390/net/qeth_core_mpc.h @@ -436,7 +436,7 @@ struct qeth_ipacmd_setassparms { __u32 flags_32bit; struct qeth_ipa_caps caps; struct qeth_checksum_cmd chksum; - struct qeth_arp_cache_entry add_arp_entry; + struct qeth_arp_cache_entry arp_entry; struct qeth_arp_query_data query_arp; struct qeth_tso_start_data tso; __u8 ip[16]; diff --git a/drivers/s390/net/qeth_l3_main.c b/drivers/s390/net/qeth_l3_main.c index b26f7d7a2ca0d..f08b745c20073 100644 --- a/drivers/s390/net/qeth_l3_main.c +++ b/drivers/s390/net/qeth_l3_main.c @@ -1777,13 +1777,18 @@ static int qeth_l3_arp_query(struct qeth_card *card, char __user *udata) return rc; } -static int qeth_l3_arp_add_entry(struct qeth_card *card, - struct qeth_arp_cache_entry *entry) +static int qeth_l3_arp_modify_entry(struct qeth_card *card, + struct qeth_arp_cache_entry *entry, + enum qeth_arp_process_subcmds arp_cmd) { + struct qeth_arp_cache_entry *cmd_entry; struct qeth_cmd_buffer *iob; int rc; - QETH_CARD_TEXT(card, 3, "arpadent"); + if (arp_cmd == IPA_CMD_ASS_ARP_ADD_ENTRY) + QETH_CARD_TEXT(card, 3, "arpadd"); + else + QETH_CARD_TEXT(card, 3, "arpdel"); /* * currently GuestLAN only supports the ARP assist function @@ -1796,54 +1801,19 @@ static int qeth_l3_arp_add_entry(struct qeth_card *card, return -EOPNOTSUPP; } - iob = qeth_get_setassparms_cmd(card, IPA_ARP_PROCESSING, - IPA_CMD_ASS_ARP_ADD_ENTRY, - sizeof(struct qeth_arp_cache_entry), - QETH_PROT_IPV4); + iob = qeth_get_setassparms_cmd(card, IPA_ARP_PROCESSING, arp_cmd, + sizeof(*cmd_entry), QETH_PROT_IPV4); if (!iob) return -ENOMEM; - rc = qeth_send_setassparms(card, iob, - sizeof(struct qeth_arp_cache_entry), - (unsigned long) entry, - qeth_setassparms_cb, NULL); - if (rc) - QETH_DBF_MESSAGE(2, "Could not add ARP entry on device %x: %#x\n", - CARD_DEVID(card), rc); - return qeth_l3_arp_makerc(rc); -} - -static int qeth_l3_arp_remove_entry(struct qeth_card *card, - struct qeth_arp_cache_entry *entry) -{ - struct qeth_cmd_buffer *iob; - char buf[16] = {0, }; - int rc; - QETH_CARD_TEXT(card, 3, "arprment"); - - /* - * currently GuestLAN only supports the ARP assist function - * IPA_CMD_ASS_ARP_QUERY_INFO, but not IPA_CMD_ASS_ARP_REMOVE_ENTRY; - * thus we say EOPNOTSUPP for this ARP function - */ - if (card->info.guestlan) - return -EOPNOTSUPP; - if (!qeth_is_supported(card, IPA_ARP_PROCESSING)) { - return -EOPNOTSUPP; - } - memcpy(buf, entry, 12); - iob = qeth_get_setassparms_cmd(card, IPA_ARP_PROCESSING, - IPA_CMD_ASS_ARP_REMOVE_ENTRY, - 12, - QETH_PROT_IPV4); - if (!iob) - return -ENOMEM; - rc = qeth_send_setassparms(card, iob, - 12, (unsigned long)buf, - qeth_setassparms_cb, NULL); + cmd_entry = &__ipa_cmd(iob)->data.setassparms.data.arp_entry; + ether_addr_copy(cmd_entry->macaddr, entry->macaddr); + memcpy(cmd_entry->ipaddr, entry->ipaddr, 4); + rc = qeth_send_ipa_cmd(card, iob, qeth_setassparms_cb, NULL); if (rc) - QETH_DBF_MESSAGE(2, "Could not delete ARP entry on device %x: %#x\n", - CARD_DEVID(card), rc); + QETH_DBF_MESSAGE(2, "Could not modify (cmd: %#x) ARP entry on device %x: %#x\n", + arp_cmd, CARD_DEVID(card), rc); + return qeth_l3_arp_makerc(rc); } @@ -1875,6 +1845,7 @@ static int qeth_l3_do_ioctl(struct net_device *dev, struct ifreq *rq, int cmd) { struct qeth_card *card = dev->ml_priv; struct qeth_arp_cache_entry arp_entry; + enum qeth_arp_process_subcmds arp_cmd; int rc = 0; switch (cmd) { @@ -1893,27 +1864,16 @@ static int qeth_l3_do_ioctl(struct net_device *dev, struct ifreq *rq, int cmd) rc = qeth_l3_arp_query(card, rq->ifr_ifru.ifru_data); break; case SIOC_QETH_ARP_ADD_ENTRY: - if (!capable(CAP_NET_ADMIN)) { - rc = -EPERM; - break; - } - if (copy_from_user(&arp_entry, rq->ifr_ifru.ifru_data, - sizeof(struct qeth_arp_cache_entry))) - rc = -EFAULT; - else - rc = qeth_l3_arp_add_entry(card, &arp_entry); - break; case SIOC_QETH_ARP_REMOVE_ENTRY: - if (!capable(CAP_NET_ADMIN)) { - rc = -EPERM; - break; - } - if (copy_from_user(&arp_entry, rq->ifr_ifru.ifru_data, - sizeof(struct qeth_arp_cache_entry))) - rc = -EFAULT; - else - rc = qeth_l3_arp_remove_entry(card, &arp_entry); - break; + if (!capable(CAP_NET_ADMIN)) + return -EPERM; + if (copy_from_user(&arp_entry, rq->ifr_data, sizeof(arp_entry))) + return -EFAULT; + + arp_cmd = (cmd == SIOC_QETH_ARP_ADD_ENTRY) ? + IPA_CMD_ASS_ARP_ADD_ENTRY : + IPA_CMD_ASS_ARP_REMOVE_ENTRY; + return qeth_l3_arp_modify_entry(card, &arp_entry, arp_cmd); case SIOC_QETH_ARP_FLUSH_CACHE: if (!capable(CAP_NET_ADMIN)) { rc = -EPERM; From 54e049c227d9968ff6a7d80aae5fec27b54d39da Mon Sep 17 00:00:00 2001 From: Julian Wiedmann <jwi@linux.ibm.com> Date: Fri, 2 Nov 2018 19:04:13 +0100 Subject: [PATCH 059/443] s390/qeth: report 25Gbit link speed This adds the various identifiers for 25Gbit cards, and wires them up into sysfs and ethtool. Signed-off-by: Julian Wiedmann <jwi@linux.ibm.com> Signed-off-by: David S. Miller <davem@davemloft.net> --- drivers/s390/net/qeth_core_main.c | 20 ++++++++++++++++++-- drivers/s390/net/qeth_core_mpc.h | 2 ++ 2 files changed, 20 insertions(+), 2 deletions(-) diff --git a/drivers/s390/net/qeth_core_main.c b/drivers/s390/net/qeth_core_main.c index 82282b2092d87..4bce5ae65a55c 100644 --- a/drivers/s390/net/qeth_core_main.c +++ b/drivers/s390/net/qeth_core_main.c @@ -167,6 +167,8 @@ const char *qeth_get_cardname_short(struct qeth_card *card) return "OSD_1000"; case QETH_LINK_TYPE_10GBIT_ETH: return "OSD_10GIG"; + case QETH_LINK_TYPE_25GBIT_ETH: + return "OSD_25GIG"; case QETH_LINK_TYPE_LANE_ETH100: return "OSD_FE_LANE"; case QETH_LINK_TYPE_LANE_TR: @@ -4432,7 +4434,8 @@ static int qeth_mdio_read(struct net_device *dev, int phy_id, int regnum) rc = BMCR_FULLDPLX; if ((card->info.link_type != QETH_LINK_TYPE_GBIT_ETH) && (card->info.link_type != QETH_LINK_TYPE_OSN) && - (card->info.link_type != QETH_LINK_TYPE_10GBIT_ETH)) + (card->info.link_type != QETH_LINK_TYPE_10GBIT_ETH) && + (card->info.link_type != QETH_LINK_TYPE_25GBIT_ETH)) rc |= BMCR_SPEED100; break; case MII_BMSR: /* Basic mode status register */ @@ -6166,8 +6169,14 @@ static void qeth_set_cmd_adv_sup(struct ethtool_link_ksettings *cmd, WARN_ON_ONCE(1); } - /* fallthrough from high to low, to select all legal speeds: */ + /* partially does fall through, to also select lower speeds */ switch (maxspeed) { + case SPEED_25000: + ethtool_link_ksettings_add_link_mode(cmd, supported, + 25000baseSR_Full); + ethtool_link_ksettings_add_link_mode(cmd, advertising, + 25000baseSR_Full); + break; case SPEED_10000: ethtool_link_ksettings_add_link_mode(cmd, supported, 10000baseT_Full); @@ -6250,6 +6259,10 @@ int qeth_core_ethtool_get_link_ksettings(struct net_device *netdev, cmd->base.speed = SPEED_10000; cmd->base.port = PORT_FIBRE; break; + case QETH_LINK_TYPE_25GBIT_ETH: + cmd->base.speed = SPEED_25000; + cmd->base.port = PORT_FIBRE; + break; default: cmd->base.speed = SPEED_10; cmd->base.port = PORT_TP; @@ -6316,6 +6329,9 @@ int qeth_core_ethtool_get_link_ksettings(struct net_device *netdev, case CARD_INFO_PORTS_10G: cmd->base.speed = SPEED_10000; break; + case CARD_INFO_PORTS_25G: + cmd->base.speed = SPEED_25000; + break; } return 0; diff --git a/drivers/s390/net/qeth_core_mpc.h b/drivers/s390/net/qeth_core_mpc.h index 80c036acf5630..3e54be201b279 100644 --- a/drivers/s390/net/qeth_core_mpc.h +++ b/drivers/s390/net/qeth_core_mpc.h @@ -90,6 +90,7 @@ enum qeth_link_types { QETH_LINK_TYPE_GBIT_ETH = 0x03, QETH_LINK_TYPE_OSN = 0x04, QETH_LINK_TYPE_10GBIT_ETH = 0x10, + QETH_LINK_TYPE_25GBIT_ETH = 0x12, QETH_LINK_TYPE_LANE_ETH100 = 0x81, QETH_LINK_TYPE_LANE_TR = 0x82, QETH_LINK_TYPE_LANE_ETH1000 = 0x83, @@ -347,6 +348,7 @@ enum qeth_card_info_port_speed { CARD_INFO_PORTS_100M = 0x00000006, CARD_INFO_PORTS_1G = 0x00000007, CARD_INFO_PORTS_10G = 0x00000008, + CARD_INFO_PORTS_25G = 0x0000000A, }; /* (SET)DELIP(M) IPA stuff ***************************************************/ From 9e4028935cca3f9ef9b6a90df9da6f1f94853536 Mon Sep 17 00:00:00 2001 From: Vasily Averin <vvs@virtuozzo.com> Date: Sat, 3 Nov 2018 16:13:17 -0400 Subject: [PATCH 060/443] ext4: avoid potential extra brelse in setup_new_flex_group_blocks() Currently bh is set to NULL only during first iteration of for cycle, then this pointer is not cleared after end of using. Therefore rollback after errors can lead to extra brelse(bh) call, decrements bh counter and later trigger an unexpected warning in __brelse() Patch moves brelse() calls in body of cycle to exclude requirement of brelse() call in rollback. Fixes: 33afdcc5402d ("ext4: add a function which sets up group blocks ...") Signed-off-by: Vasily Averin <vvs@virtuozzo.com> Signed-off-by: Theodore Ts'o <tytso@mit.edu> Cc: stable@kernel.org # 3.3+ --- fs/ext4/resize.c | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) diff --git a/fs/ext4/resize.c b/fs/ext4/resize.c index ebbc663d07985..c3fa30878ca83 100644 --- a/fs/ext4/resize.c +++ b/fs/ext4/resize.c @@ -605,7 +605,6 @@ static int setup_new_flex_group_blocks(struct super_block *sb, bh = bclean(handle, sb, block); if (IS_ERR(bh)) { err = PTR_ERR(bh); - bh = NULL; goto out; } overhead = ext4_group_overhead_blocks(sb, group); @@ -618,9 +617,9 @@ static int setup_new_flex_group_blocks(struct super_block *sb, ext4_mark_bitmap_end(EXT4_B2C(sbi, group_data[i].blocks_count), sb->s_blocksize * 8, bh->b_data); err = ext4_handle_dirty_metadata(handle, NULL, bh); + brelse(bh); if (err) goto out; - brelse(bh); handle_ib: if (bg_flags[i] & EXT4_BG_INODE_UNINIT) @@ -635,18 +634,16 @@ static int setup_new_flex_group_blocks(struct super_block *sb, bh = bclean(handle, sb, block); if (IS_ERR(bh)) { err = PTR_ERR(bh); - bh = NULL; goto out; } ext4_mark_bitmap_end(EXT4_INODES_PER_GROUP(sb), sb->s_blocksize * 8, bh->b_data); err = ext4_handle_dirty_metadata(handle, NULL, bh); + brelse(bh); if (err) goto out; - brelse(bh); } - bh = NULL; /* Mark group tables in block bitmap */ for (j = 0; j < GROUP_TABLE_COUNT; j++) { @@ -685,7 +682,6 @@ static int setup_new_flex_group_blocks(struct super_block *sb, } out: - brelse(bh); err2 = ext4_journal_stop(handle); if (err2 && !err) err = err2; From cea5794122125bf67559906a0762186cf417099c Mon Sep 17 00:00:00 2001 From: Vasily Averin <vvs@virtuozzo.com> Date: Sat, 3 Nov 2018 16:22:10 -0400 Subject: [PATCH 061/443] ext4: add missing brelse() in set_flexbg_block_bitmap()'s error path Fixes: 33afdcc5402d ("ext4: add a function which sets up group blocks ...") Cc: stable@kernel.org # 3.3 Signed-off-by: Vasily Averin <vvs@virtuozzo.com> Signed-off-by: Theodore Ts'o <tytso@mit.edu> --- fs/ext4/resize.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/fs/ext4/resize.c b/fs/ext4/resize.c index c3fa30878ca83..0a4dc6217e783 100644 --- a/fs/ext4/resize.c +++ b/fs/ext4/resize.c @@ -459,16 +459,18 @@ static int set_flexbg_block_bitmap(struct super_block *sb, handle_t *handle, BUFFER_TRACE(bh, "get_write_access"); err = ext4_journal_get_write_access(handle, bh); - if (err) + if (err) { + brelse(bh); return err; + } ext4_debug("mark block bitmap %#04llx (+%llu/%u)\n", first_cluster, first_cluster - start, count2); ext4_set_bits(bh->b_data, first_cluster - start, count2); err = ext4_handle_dirty_metadata(handle, NULL, bh); + brelse(bh); if (unlikely(err)) return err; - brelse(bh); } return 0; From 61a9c11e5e7a0dab5381afa5d9d4dd5ebf18f7a0 Mon Sep 17 00:00:00 2001 From: Vasily Averin <vvs@virtuozzo.com> Date: Sat, 3 Nov 2018 16:50:08 -0400 Subject: [PATCH 062/443] ext4: add missing brelse() add_new_gdb_meta_bg()'s error path Fixes: 01f795f9e0d6 ("ext4: add online resizing support for meta_bg ...") Signed-off-by: Vasily Averin <vvs@virtuozzo.com> Signed-off-by: Theodore Ts'o <tytso@mit.edu> Cc: stable@kernel.org # 3.7 --- fs/ext4/resize.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/fs/ext4/resize.c b/fs/ext4/resize.c index 0a4dc6217e783..7131f35b62d9a 100644 --- a/fs/ext4/resize.c +++ b/fs/ext4/resize.c @@ -922,6 +922,7 @@ static int add_new_gdb_meta_bg(struct super_block *sb, sizeof(struct buffer_head *), GFP_NOFS); if (!n_group_desc) { + brelse(gdb_bh); err = -ENOMEM; ext4_warning(sb, "not enough memory for %lu groups", gdb_num + 1); @@ -937,8 +938,6 @@ static int add_new_gdb_meta_bg(struct super_block *sb, kvfree(o_group_desc); BUFFER_TRACE(gdb_bh, "get_write_access"); err = ext4_journal_get_write_access(handle, gdb_bh); - if (unlikely(err)) - brelse(gdb_bh); return err; } From ea0abbb648452cdb6e1734b702b6330a7448fcf8 Mon Sep 17 00:00:00 2001 From: Vasily Averin <vvs@virtuozzo.com> Date: Sat, 3 Nov 2018 17:11:19 -0400 Subject: [PATCH 063/443] ext4: add missing brelse() update_backups()'s error path Fixes: ac27a0ec112a ("ext4: initial copy of files from ext3") Signed-off-by: Vasily Averin <vvs@virtuozzo.com> Signed-off-by: Theodore Ts'o <tytso@mit.edu> Cc: stable@kernel.org # 2.6.19 --- fs/ext4/resize.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/fs/ext4/resize.c b/fs/ext4/resize.c index 7131f35b62d9a..3df326ee6d506 100644 --- a/fs/ext4/resize.c +++ b/fs/ext4/resize.c @@ -1121,8 +1121,10 @@ static void update_backups(struct super_block *sb, sector_t blk_off, char *data, backup_block, backup_block - ext4_group_first_block_no(sb, group)); BUFFER_TRACE(bh, "get_write_access"); - if ((err = ext4_journal_get_write_access(handle, bh))) + if ((err = ext4_journal_get_write_access(handle, bh))) { + brelse(bh); break; + } lock_buffer(bh); memcpy(bh->b_data, data, size); if (rest) From 3e59020abf0f88182730527ee5b862e786eb485a Mon Sep 17 00:00:00 2001 From: Eric Dumazet <edumazet@google.com> Date: Wed, 31 Oct 2018 08:39:12 -0700 Subject: [PATCH 064/443] net: bql: add __netdev_tx_sent_queue() When qdisc_run() tries to use BQL budget to bulk-dequeue a batch of packets, GSO can later transform this list in another list of skbs, and each skb is sent to device ndo_start_xmit(), one at a time, with skb->xmit_more being set to one but for last skb. Problem is that very often, BQL limit is hit in the middle of the packet train, forcing dev_hard_start_xmit() to stop the bulk send and requeue the end of the list. BQL role is to avoid head of line blocking, making sure a qdisc can deliver high priority packets before low priority ones. But there is no way requeued packets can be bypassed by fresh packets in the qdisc. Aborting the bulk send increases TX softirqs, and hot cache lines (after skb_segment()) are wasted. Note that for TSO packets, we never split a packet in the middle because of BQL limit being hit. Drivers should be able to update BQL counters without flipping/caring about BQL status, if the current skb has xmit_more set. Upper layers are ultimately responsible to stop sending another packet train when BQL limit is hit. Code template in a driver might look like the following : send_doorbell = __netdev_tx_sent_queue(tx_queue, nr_bytes, skb->xmit_more); Note that __netdev_tx_sent_queue() use is not mandatory, since following patch will change dev_hard_start_xmit() to not care about BQL status. But it is highly recommended so that xmit_more full benefits can be reached (less doorbells sent, and less atomic operations as well) Signed-off-by: Eric Dumazet <edumazet@google.com> Signed-off-by: David S. Miller <davem@davemloft.net> --- include/linux/netdevice.h | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index dc1d9ed33b319..857f8abf7b91b 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -3190,6 +3190,26 @@ static inline void netdev_tx_sent_queue(struct netdev_queue *dev_queue, #endif } +/* Variant of netdev_tx_sent_queue() for drivers that are aware + * that they should not test BQL status themselves. + * We do want to change __QUEUE_STATE_STACK_XOFF only for the last + * skb of a batch. + * Returns true if the doorbell must be used to kick the NIC. + */ +static inline bool __netdev_tx_sent_queue(struct netdev_queue *dev_queue, + unsigned int bytes, + bool xmit_more) +{ + if (xmit_more) { +#ifdef CONFIG_BQL + dql_queued(&dev_queue->dql, bytes); +#endif + return netif_tx_queue_stopped(dev_queue); + } + netdev_tx_sent_queue(dev_queue, bytes); + return true; +} + /** * netdev_sent_queue - report the number of bytes queued to hardware * @dev: network device From fe60faa5063822f2d555f4f326c7dd72a60929bf Mon Sep 17 00:00:00 2001 From: Eric Dumazet <edumazet@google.com> Date: Wed, 31 Oct 2018 08:39:13 -0700 Subject: [PATCH 065/443] net: do not abort bulk send on BQL status Before calling dev_hard_start_xmit(), upper layers tried to cook optimal skb list based on BQL budget. Problem is that GSO packets can end up comsuming more than the BQL budget. Breaking the loop is not useful, since requeued packets are ahead of any packets still in the qdisc. It is also more expensive, since next TX completion will push these packets later, while skbs are not in cpu caches. It is also a behavior difference with TSO packets, that can break the BQL limit by a large amount. Note that drivers should use __netdev_tx_sent_queue() in order to have optimal xmit_more support, and avoid useless atomic operations as shown in the following patch. Signed-off-by: Eric Dumazet <edumazet@google.com> Signed-off-by: David S. Miller <davem@davemloft.net> --- net/core/dev.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/core/dev.c b/net/core/dev.c index 77d43ae2a7bbe..0ffcbdd55fa9e 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -3272,7 +3272,7 @@ struct sk_buff *dev_hard_start_xmit(struct sk_buff *first, struct net_device *de } skb = next; - if (netif_xmit_stopped(txq) && skb) { + if (netif_tx_queue_stopped(txq) && skb) { rc = NETDEV_TX_BUSY; break; } From c29734443511aa3c53844e1941805fc761aa80ba Mon Sep 17 00:00:00 2001 From: Eric Dumazet <edumazet@google.com> Date: Wed, 31 Oct 2018 08:39:14 -0700 Subject: [PATCH 066/443] net/mlx4_en: use __netdev_tx_sent_queue() doorbell only depends on xmit_more and netif_tx_queue_stopped() Using __netdev_tx_sent_queue() avoids messing with BQL stop flag, and is more generic. This patch increases performance on GSO workload by keeping doorbells to the minimum required. Signed-off-by: Eric Dumazet <edumazet@google.com> Cc: Tariq Toukan <tariqt@mellanox.com> Reviewed-by: Tariq Toukan <tariqt@mellanox.com> Signed-off-by: David S. Miller <davem@davemloft.net> --- drivers/net/ethernet/mellanox/mlx4/en_tx.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx4/en_tx.c b/drivers/net/ethernet/mellanox/mlx4/en_tx.c index 1857ee0f0871d..6f5153afcab4d 100644 --- a/drivers/net/ethernet/mellanox/mlx4/en_tx.c +++ b/drivers/net/ethernet/mellanox/mlx4/en_tx.c @@ -1006,7 +1006,6 @@ netdev_tx_t mlx4_en_xmit(struct sk_buff *skb, struct net_device *dev) ring->packets++; } ring->bytes += tx_info->nr_bytes; - netdev_tx_sent_queue(ring->tx_queue, tx_info->nr_bytes); AVG_PERF_COUNTER(priv->pstats.tx_pktsz_avg, skb->len); if (tx_info->inl) @@ -1044,7 +1043,10 @@ netdev_tx_t mlx4_en_xmit(struct sk_buff *skb, struct net_device *dev) netif_tx_stop_queue(ring->tx_queue); ring->queue_stopped++; } - send_doorbell = !skb->xmit_more || netif_xmit_stopped(ring->tx_queue); + + send_doorbell = __netdev_tx_sent_queue(ring->tx_queue, + tx_info->nr_bytes, + skb->xmit_more); real_size = (real_size / 16) & 0x3f; From e8ccbb7d2f53c62e14b889faaa3f6f809b657278 Mon Sep 17 00:00:00 2001 From: Yunsheng Lin <linyunsheng@huawei.com> Date: Fri, 2 Nov 2018 17:47:48 +0800 Subject: [PATCH 067/443] net: hns3: Fix for out-of-bounds access when setting pfc back pressure The vport should be initialized to hdev->vport for each bp group, otherwise it will cause out-of-bounds access and bp setting not correct problem. [ 35.254124] BUG: KASAN: slab-out-of-bounds in hclge_pause_setup_hw+0x2a0/0x3f8 [hclge] [ 35.254126] Read of size 2 at addr ffff803b6651581a by task kworker/0:1/14 [ 35.254132] CPU: 0 PID: 14 Comm: kworker/0:1 Not tainted 4.19.0-rc7-hulk+ #85 [ 35.254133] Hardware name: Huawei D06/D06, BIOS Hisilicon D06 UEFI RC0 - B052 (V0.52) 09/14/2018 [ 35.254141] Workqueue: events work_for_cpu_fn [ 35.254144] Call trace: [ 35.254147] dump_backtrace+0x0/0x2f0 [ 35.254149] show_stack+0x24/0x30 [ 35.254154] dump_stack+0x110/0x184 [ 35.254157] print_address_description+0x168/0x2b0 [ 35.254160] kasan_report+0x184/0x310 [ 35.254162] __asan_load2+0x7c/0xa0 [ 35.254170] hclge_pause_setup_hw+0x2a0/0x3f8 [hclge] [ 35.254177] hclge_tm_init_hw+0x794/0x9f0 [hclge] [ 35.254184] hclge_tm_schd_init+0x48/0x58 [hclge] [ 35.254191] hclge_init_ae_dev+0x778/0x1168 [hclge] [ 35.254196] hnae3_register_ae_dev+0x14c/0x298 [hnae3] [ 35.254206] hns3_probe+0x88/0xa8 [hns3] [ 35.254210] local_pci_probe+0x7c/0xf0 [ 35.254212] work_for_cpu_fn+0x34/0x50 [ 35.254214] process_one_work+0x4d4/0xa38 [ 35.254216] worker_thread+0x55c/0x8d8 [ 35.254219] kthread+0x1b0/0x1b8 [ 35.254222] ret_from_fork+0x10/0x1c [ 35.254224] The buggy address belongs to the page: [ 35.254228] page:ffff7e00ed994400 count:1 mapcount:0 mapping:0000000000000000 index:0x0 compound_mapcount: 0 [ 35.273835] flags: 0xfffff8000008000(head) [ 35.282007] raw: 0fffff8000008000 dead000000000100 dead000000000200 0000000000000000 [ 35.282010] raw: 0000000000000000 0000000000000000 00000001ffffffff 0000000000000000 [ 35.282012] page dumped because: kasan: bad access detected [ 35.282014] Memory state around the buggy address: [ 35.282017] ffff803b66515700: fe fe fe fe fe fe fe fe fe fe fe fe fe fe fe fe [ 35.282019] ffff803b66515780: fe fe fe fe fe fe fe fe fe fe fe fe fe fe fe fe [ 35.282021] >ffff803b66515800: fe fe fe fe fe fe fe fe fe fe fe fe fe fe fe fe [ 35.282022] ^ [ 35.282024] ffff803b66515880: fe fe fe fe fe fe fe fe fe fe fe fe fe fe fe fe [ 35.282026] ffff803b66515900: fe fe fe fe fe fe fe fe fe fe fe fe fe fe fe fe [ 35.282028] ================================================================== [ 35.282029] Disabling lock debugging due to kernel taint [ 35.282747] hclge driver initialization finished. Fixes: 67bf2541f4b9 ("net: hns3: Fixes the back pressure setting when sriov is enabled") Signed-off-by: Yunsheng Lin <linyunsheng@huawei.com> Signed-off-by: David S. Miller <davem@davemloft.net> --- drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_tm.c | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_tm.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_tm.c index aa5cb9834d73a..494e562fe8c7e 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_tm.c +++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_tm.c @@ -1168,14 +1168,14 @@ static int hclge_pfc_setup_hw(struct hclge_dev *hdev) */ static int hclge_bp_setup_hw(struct hclge_dev *hdev, u8 tc) { - struct hclge_vport *vport = hdev->vport; - u32 i, k, qs_bitmap; - int ret; + int i; for (i = 0; i < HCLGE_BP_GRP_NUM; i++) { - qs_bitmap = 0; + u32 qs_bitmap = 0; + int k, ret; for (k = 0; k < hdev->num_alloc_vport; k++) { + struct hclge_vport *vport = &hdev->vport[k]; u16 qs_id = vport->qs_offset + tc; u8 grp, sub_grp; @@ -1185,8 +1185,6 @@ static int hclge_bp_setup_hw(struct hclge_dev *hdev, u8 tc) HCLGE_BP_SUB_GRP_ID_S); if (i == grp) qs_bitmap |= (1 << sub_grp); - - vport++; } ret = hclge_tm_qs_bp_cfg(hdev, tc, i, qs_bitmap); From b987ffc18fb3b3b76b059aa9e372dbee26f7c4f2 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra <peterz@infradead.org> Date: Fri, 2 Nov 2018 14:26:53 +0100 Subject: [PATCH 068/443] x86/qspinlock: Fix compile error With a compiler that has asm-goto but not asm-cc-output and CONFIG_PROFILE_ALL_BRANCHES=y we get a compiler error: arch/x86/include/asm/rmwcc.h:23:17: error: jump into statement expression Fix this by writing the if() as a boolean multiplication instead. Reported-by: kbuild test robot <lkp@intel.com> Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org> Cc: Linus Torvalds <torvalds@linux-foundation.org> Cc: Peter Zijlstra <peterz@infradead.org> Cc: Thomas Gleixner <tglx@linutronix.de> Cc: Will Deacon <will.deacon@arm.com> Cc: linux-kernel@vger.kernel.org Fixes: 7aa54be29765 ("locking/qspinlock, x86: Provide liveness guarantee") Signed-off-by: Ingo Molnar <mingo@kernel.org> --- arch/x86/include/asm/qspinlock.h | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/arch/x86/include/asm/qspinlock.h b/arch/x86/include/asm/qspinlock.h index 87623c6b13db5..bd5ac6cc37db5 100644 --- a/arch/x86/include/asm/qspinlock.h +++ b/arch/x86/include/asm/qspinlock.h @@ -13,12 +13,15 @@ #define queued_fetch_set_pending_acquire queued_fetch_set_pending_acquire static __always_inline u32 queued_fetch_set_pending_acquire(struct qspinlock *lock) { - u32 val = 0; - - if (GEN_BINARY_RMWcc(LOCK_PREFIX "btsl", lock->val.counter, c, - "I", _Q_PENDING_OFFSET)) - val |= _Q_PENDING_VAL; + u32 val; + /* + * We can't use GEN_BINARY_RMWcc() inside an if() stmt because asm goto + * and CONFIG_PROFILE_ALL_BRANCHES=y results in a label inside a + * statement expression, which GCC doesn't like. + */ + val = GEN_BINARY_RMWcc(LOCK_PREFIX "btsl", lock->val.counter, c, + "I", _Q_PENDING_OFFSET) * _Q_PENDING_VAL; val |= atomic_read(&lock->val) & ~_Q_PENDING_MASK; return val; From 40fa3780bac2b654edf23f6b13f4e2dd550aea10 Mon Sep 17 00:00:00 2001 From: Valentin Schneider <valentin.schneider@arm.com> Date: Tue, 23 Oct 2018 14:37:31 +0100 Subject: [PATCH 069/443] sched/core: Take the hotplug lock in sched_init_smp() When running on linux-next (8c60c36d0b8c ("Add linux-next specific files for 20181019")) + CONFIG_PROVE_LOCKING=y on a big.LITTLE system (e.g. Juno or HiKey960), we get the following report: [ 0.748225] Call trace: [ 0.750685] lockdep_assert_cpus_held+0x30/0x40 [ 0.755236] static_key_enable_cpuslocked+0x20/0xc8 [ 0.760137] build_sched_domains+0x1034/0x1108 [ 0.764601] sched_init_domains+0x68/0x90 [ 0.768628] sched_init_smp+0x30/0x80 [ 0.772309] kernel_init_freeable+0x278/0x51c [ 0.776685] kernel_init+0x10/0x108 [ 0.780190] ret_from_fork+0x10/0x18 The static_key in question is 'sched_asym_cpucapacity' introduced by commit: df054e8445a4 ("sched/topology: Add static_key for asymmetric CPU capacity optimizations") In this particular case, we enable it because smp_prepare_cpus() will end up fetching the capacity-dmips-mhz entry from the devicetree, so we already have some asymmetry detected when entering sched_init_smp(). This didn't get detected in tip/sched/core because we were missing: commit cb538267ea1e ("jump_label/lockdep: Assert we hold the hotplug lock for _cpuslocked() operations") Calls to build_sched_domains() post sched_init_smp() will hold the hotplug lock, it just so happens that this very first call is a special case. As stated by a comment in sched_init_smp(), "There's no userspace yet to cause hotplug operations" so this is a harmless warning. However, to both respect the semantics of underlying callees and make lockdep happy, take the hotplug lock in sched_init_smp(). This also satisfies the comment atop sched_init_domains() that says "Callers must hold the hotplug lock". Reported-by: Sudeep Holla <sudeep.holla@arm.com> Tested-by: Sudeep Holla <sudeep.holla@arm.com> Signed-off-by: Valentin Schneider <valentin.schneider@arm.com> Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org> Cc: Dietmar.Eggemann@arm.com Cc: Linus Torvalds <torvalds@linux-foundation.org> Cc: Peter Zijlstra <peterz@infradead.org> Cc: Thomas Gleixner <tglx@linutronix.de> Cc: morten.rasmussen@arm.com Cc: quentin.perret@arm.com Link: http://lkml.kernel.org/r/1540301851-3048-1-git-send-email-valentin.schneider@arm.com Signed-off-by: Ingo Molnar <mingo@kernel.org> --- kernel/sched/core.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/kernel/sched/core.c b/kernel/sched/core.c index fd2fce8a001be..02a20ef196a65 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -5859,11 +5859,14 @@ void __init sched_init_smp(void) /* * There's no userspace yet to cause hotplug operations; hence all the * CPU masks are stable and all blatant races in the below code cannot - * happen. + * happen. The hotplug lock is nevertheless taken to satisfy lockdep, + * but there won't be any contention on it. */ + cpus_read_lock(); mutex_lock(&sched_domains_mutex); sched_init_domains(cpu_active_mask); mutex_unlock(&sched_domains_mutex); + cpus_read_unlock(); /* Move init over to a non-isolated CPU */ if (set_cpus_allowed_ptr(current, housekeeping_cpumask(HK_FLAG_DOMAIN)) < 0) From 92619210529a6b77b2cbedbadba3ff5eaa6e28ed Mon Sep 17 00:00:00 2001 From: Arnd Bergmann <arnd@arndb.de> Date: Fri, 2 Nov 2018 16:36:24 +0100 Subject: [PATCH 070/443] qed: fix link config error handling gcc-8 notices that qed_mcp_get_transceiver_data() may fail to return a result to the caller: drivers/net/ethernet/qlogic/qed/qed_mcp.c: In function 'qed_mcp_trans_speed_mask': drivers/net/ethernet/qlogic/qed/qed_mcp.c:1955:2: error: 'transceiver_type' may be used uninitialized in this function [-Werror=maybe-uninitialized] When an error is returned by qed_mcp_get_transceiver_data(), we should propagate that to the caller of qed_mcp_trans_speed_mask() rather than continuing with uninitialized data. Fixes: c56a8be7e7aa ("qed: Add supported link and advertise link to display in ethtool.") Signed-off-by: Arnd Bergmann <arnd@arndb.de> Signed-off-by: David S. Miller <davem@davemloft.net> --- drivers/net/ethernet/qlogic/qed/qed_mcp.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/qlogic/qed/qed_mcp.c b/drivers/net/ethernet/qlogic/qed/qed_mcp.c index f40f654398a07..a96364df43203 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_mcp.c +++ b/drivers/net/ethernet/qlogic/qed/qed_mcp.c @@ -1944,9 +1944,12 @@ int qed_mcp_trans_speed_mask(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt, u32 *p_speed_mask) { u32 transceiver_type, transceiver_state; + int ret; - qed_mcp_get_transceiver_data(p_hwfn, p_ptt, &transceiver_state, - &transceiver_type); + ret = qed_mcp_get_transceiver_data(p_hwfn, p_ptt, &transceiver_state, + &transceiver_type); + if (ret) + return ret; if (qed_is_transceiver_ready(transceiver_state, transceiver_type) == false) From a277d516de5f498c91d91189717ef7e01102ad27 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann <arnd@arndb.de> Date: Fri, 2 Nov 2018 16:36:55 +0100 Subject: [PATCH 071/443] openvswitch: fix linking without CONFIG_NF_CONNTRACK_LABELS When CONFIG_CC_OPTIMIZE_FOR_DEBUGGING is enabled, the compiler fails to optimize out a dead code path, which leads to a link failure: net/openvswitch/conntrack.o: In function `ovs_ct_set_labels': conntrack.c:(.text+0x2e60): undefined reference to `nf_connlabels_replace' In this configuration, we can take a shortcut, and completely remove the contrack label code. This may also help the regular optimization. Signed-off-by: Arnd Bergmann <arnd@arndb.de> Signed-off-by: David S. Miller <davem@davemloft.net> --- net/openvswitch/conntrack.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/net/openvswitch/conntrack.c b/net/openvswitch/conntrack.c index 6bec37ab44727..a4660c48ff014 100644 --- a/net/openvswitch/conntrack.c +++ b/net/openvswitch/conntrack.c @@ -1203,7 +1203,8 @@ static int ovs_ct_commit(struct net *net, struct sw_flow_key *key, &info->labels.mask); if (err) return err; - } else if (labels_nonzero(&info->labels.mask)) { + } else if (IS_ENABLED(CONFIG_NF_CONNTRACK_LABELS) && + labels_nonzero(&info->labels.mask)) { err = ovs_ct_set_labels(ct, key, &info->labels.value, &info->labels.mask); if (err) From 96801552f846460fe9ac10f1b189602992f004e1 Mon Sep 17 00:00:00 2001 From: Shalom Toledo <shalomt@mellanox.com> Date: Fri, 2 Nov 2018 19:49:15 +0000 Subject: [PATCH 072/443] mlxsw: spectrum: Fix IP2ME CPU policer configuration The CPU policer used to police packets being trapped via a local route (IP2ME) was incorrectly configured to police based on bytes per second instead of packets per second. Change the policer to police based on packets per second and avoid packet loss under certain circumstances. Fixes: 9148e7cf73ce ("mlxsw: spectrum: Add policers for trap groups") Signed-off-by: Shalom Toledo <shalomt@mellanox.com> Signed-off-by: Ido Schimmel <idosch@mellanox.com> Signed-off-by: David S. Miller <davem@davemloft.net> --- drivers/net/ethernet/mellanox/mlxsw/spectrum.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c index a2df12b79f8e9..9bec940330a45 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c @@ -3568,7 +3568,6 @@ static int mlxsw_sp_cpu_policers_set(struct mlxsw_core *mlxsw_core) burst_size = 7; break; case MLXSW_REG_HTGT_TRAP_GROUP_SP_IP2ME: - is_bytes = true; rate = 4 * 1024; burst_size = 4; break; From fd82d61ba142f0b83463e47064bf5460aac57b6e Mon Sep 17 00:00:00 2001 From: Xin Long <lucien.xin@gmail.com> Date: Sat, 3 Nov 2018 13:59:45 +0800 Subject: [PATCH 073/443] sctp: fix strchange_flags name for Stream Change Event As defined in rfc6525#section-6.1.3, SCTP_STREAM_CHANGE_DENIED and SCTP_STREAM_CHANGE_FAILED should be used instead of SCTP_ASSOC_CHANGE_DENIED and SCTP_ASSOC_CHANGE_FAILED. To keep the compatibility, fix it by adding two macros. Fixes: b444153fb5a6 ("sctp: add support for generating add stream change event notification") Reported-by: Jianwen Ji <jiji@redhat.com> Signed-off-by: Xin Long <lucien.xin@gmail.com> Signed-off-by: David S. Miller <davem@davemloft.net> --- include/uapi/linux/sctp.h | 2 ++ 1 file changed, 2 insertions(+) diff --git a/include/uapi/linux/sctp.h b/include/uapi/linux/sctp.h index 34dd3d497f2cc..680ecc3bf2a98 100644 --- a/include/uapi/linux/sctp.h +++ b/include/uapi/linux/sctp.h @@ -568,6 +568,8 @@ struct sctp_assoc_reset_event { #define SCTP_ASSOC_CHANGE_DENIED 0x0004 #define SCTP_ASSOC_CHANGE_FAILED 0x0008 +#define SCTP_STREAM_CHANGE_DENIED SCTP_ASSOC_CHANGE_DENIED +#define SCTP_STREAM_CHANGE_FAILED SCTP_ASSOC_CHANGE_FAILED struct sctp_stream_change_event { __u16 strchange_type; __u16 strchange_flags; From 12480e3b16982c4026de10dd8155823219cd6391 Mon Sep 17 00:00:00 2001 From: Xin Long <lucien.xin@gmail.com> Date: Sat, 3 Nov 2018 14:01:31 +0800 Subject: [PATCH 074/443] sctp: define SCTP_SS_DEFAULT for Stream schedulers According to rfc8260#section-4.3.2, SCTP_SS_DEFAULT is required to defined as SCTP_SS_FCFS or SCTP_SS_RR. SCTP_SS_FCFS is used for SCTP_SS_DEFAULT's value in this patch. Fixes: 5bbbbe32a431 ("sctp: introduce stream scheduler foundations") Reported-by: Jianwen Ji <jiji@redhat.com> Signed-off-by: Xin Long <lucien.xin@gmail.com> Signed-off-by: David S. Miller <davem@davemloft.net> --- include/uapi/linux/sctp.h | 1 + net/sctp/outqueue.c | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/include/uapi/linux/sctp.h b/include/uapi/linux/sctp.h index 680ecc3bf2a98..c81feb373d3ea 100644 --- a/include/uapi/linux/sctp.h +++ b/include/uapi/linux/sctp.h @@ -1153,6 +1153,7 @@ struct sctp_add_streams { /* SCTP Stream schedulers */ enum sctp_sched_type { SCTP_SS_FCFS, + SCTP_SS_DEFAULT = SCTP_SS_FCFS, SCTP_SS_PRIO, SCTP_SS_RR, SCTP_SS_MAX = SCTP_SS_RR diff --git a/net/sctp/outqueue.c b/net/sctp/outqueue.c index 9cb854b05342e..c37e1c2dec9d4 100644 --- a/net/sctp/outqueue.c +++ b/net/sctp/outqueue.c @@ -212,7 +212,7 @@ void sctp_outq_init(struct sctp_association *asoc, struct sctp_outq *q) INIT_LIST_HEAD(&q->retransmit); INIT_LIST_HEAD(&q->sacked); INIT_LIST_HEAD(&q->abandoned); - sctp_sched_set_sched(asoc, SCTP_SS_FCFS); + sctp_sched_set_sched(asoc, SCTP_SS_DEFAULT); } /* Free the outqueue structure and any related pending chunks. From 35b69a420bfb56b7b74cb635ea903db05e357bec Mon Sep 17 00:00:00 2001 From: Michael Kelley <mikelley@microsoft.com> Date: Sun, 4 Nov 2018 03:48:54 +0000 Subject: [PATCH 075/443] clockevents/drivers/i8253: Add support for PIT shutdown quirk Add support for platforms where pit_shutdown() doesn't work because of a quirk in the PIT emulation. On these platforms setting the counter register to zero causes the PIT to start running again, negating the shutdown. Provide a global variable that controls whether the counter register is zero'ed, which platform specific code can override. Signed-off-by: Michael Kelley <mikelley@microsoft.com> Signed-off-by: Thomas Gleixner <tglx@linutronix.de> Cc: "gregkh@linuxfoundation.org" <gregkh@linuxfoundation.org> Cc: "devel@linuxdriverproject.org" <devel@linuxdriverproject.org> Cc: "daniel.lezcano@linaro.org" <daniel.lezcano@linaro.org> Cc: "virtualization@lists.linux-foundation.org" <virtualization@lists.linux-foundation.org> Cc: "jgross@suse.com" <jgross@suse.com> Cc: "akataria@vmware.com" <akataria@vmware.com> Cc: "olaf@aepfle.de" <olaf@aepfle.de> Cc: "apw@canonical.com" <apw@canonical.com> Cc: vkuznets <vkuznets@redhat.com> Cc: "jasowang@redhat.com" <jasowang@redhat.com> Cc: "marcelo.cerri@canonical.com" <marcelo.cerri@canonical.com> Cc: KY Srinivasan <kys@microsoft.com> Cc: stable@vger.kernel.org Link: https://lkml.kernel.org/r/1541303219-11142-2-git-send-email-mikelley@microsoft.com --- drivers/clocksource/i8253.c | 14 ++++++++++++-- include/linux/i8253.h | 1 + 2 files changed, 13 insertions(+), 2 deletions(-) diff --git a/drivers/clocksource/i8253.c b/drivers/clocksource/i8253.c index 9c38895542f4a..d4350bb10b83a 100644 --- a/drivers/clocksource/i8253.c +++ b/drivers/clocksource/i8253.c @@ -20,6 +20,13 @@ DEFINE_RAW_SPINLOCK(i8253_lock); EXPORT_SYMBOL(i8253_lock); +/* + * Handle PIT quirk in pit_shutdown() where zeroing the counter register + * restarts the PIT, negating the shutdown. On platforms with the quirk, + * platform specific code can set this to false. + */ +bool i8253_clear_counter_on_shutdown __ro_after_init = true; + #ifdef CONFIG_CLKSRC_I8253 /* * Since the PIT overflows every tick, its not very useful @@ -109,8 +116,11 @@ static int pit_shutdown(struct clock_event_device *evt) raw_spin_lock(&i8253_lock); outb_p(0x30, PIT_MODE); - outb_p(0, PIT_CH0); - outb_p(0, PIT_CH0); + + if (i8253_clear_counter_on_shutdown) { + outb_p(0, PIT_CH0); + outb_p(0, PIT_CH0); + } raw_spin_unlock(&i8253_lock); return 0; diff --git a/include/linux/i8253.h b/include/linux/i8253.h index e6bb36a97519b..8336b2f6f8346 100644 --- a/include/linux/i8253.h +++ b/include/linux/i8253.h @@ -21,6 +21,7 @@ #define PIT_LATCH ((PIT_TICK_RATE + HZ/2) / HZ) extern raw_spinlock_t i8253_lock; +extern bool i8253_clear_counter_on_shutdown; extern struct clock_event_device i8253_clockevent; extern void clockevent_i8253_init(bool oneshot); From 1de72c706488b7be664a601cf3843bd01e327e58 Mon Sep 17 00:00:00 2001 From: Michael Kelley <mikelley@microsoft.com> Date: Sun, 4 Nov 2018 03:48:57 +0000 Subject: [PATCH 076/443] x86/hyper-v: Enable PIT shutdown quirk Hyper-V emulation of the PIT has a quirk such that the normal PIT shutdown path doesn't work, because clearing the counter register restarts the timer. Disable the counter clearing on PIT shutdown. Signed-off-by: Michael Kelley <mikelley@microsoft.com> Signed-off-by: Thomas Gleixner <tglx@linutronix.de> Cc: "gregkh@linuxfoundation.org" <gregkh@linuxfoundation.org> Cc: "devel@linuxdriverproject.org" <devel@linuxdriverproject.org> Cc: "daniel.lezcano@linaro.org" <daniel.lezcano@linaro.org> Cc: "virtualization@lists.linux-foundation.org" <virtualization@lists.linux-foundation.org> Cc: "jgross@suse.com" <jgross@suse.com> Cc: "akataria@vmware.com" <akataria@vmware.com> Cc: "olaf@aepfle.de" <olaf@aepfle.de> Cc: "apw@canonical.com" <apw@canonical.com> Cc: vkuznets <vkuznets@redhat.com> Cc: "jasowang@redhat.com" <jasowang@redhat.com> Cc: "marcelo.cerri@canonical.com" <marcelo.cerri@canonical.com> Cc: KY Srinivasan <kys@microsoft.com> Cc: stable@vger.kernel.org Link: https://lkml.kernel.org/r/1541303219-11142-3-git-send-email-mikelley@microsoft.com --- arch/x86/kernel/cpu/mshyperv.c | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/arch/x86/kernel/cpu/mshyperv.c b/arch/x86/kernel/cpu/mshyperv.c index 1c72f3819eb12..e81a2db42df7b 100644 --- a/arch/x86/kernel/cpu/mshyperv.c +++ b/arch/x86/kernel/cpu/mshyperv.c @@ -20,6 +20,7 @@ #include <linux/interrupt.h> #include <linux/irq.h> #include <linux/kexec.h> +#include <linux/i8253.h> #include <asm/processor.h> #include <asm/hypervisor.h> #include <asm/hyperv-tlfs.h> @@ -295,6 +296,16 @@ static void __init ms_hyperv_init_platform(void) if (efi_enabled(EFI_BOOT)) x86_platform.get_nmi_reason = hv_get_nmi_reason; + /* + * Hyper-V VMs have a PIT emulation quirk such that zeroing the + * counter register during PIT shutdown restarts the PIT. So it + * continues to interrupt @18.2 HZ. Setting i8253_clear_counter + * to false tells pit_shutdown() not to zero the counter so that + * the PIT really is shutdown. Generation 2 VMs don't have a PIT, + * and setting this value has no effect. + */ + i8253_clear_counter_on_shutdown = false; + #if IS_ENABLED(CONFIG_HYPERV) /* * Setup the hook to get control post apic initialization. From 74e3512731bd5c9673176425a76a7cc5efa8ddb6 Mon Sep 17 00:00:00 2001 From: Dmitry Osipenko <digetx@gmail.com> Date: Wed, 24 Oct 2018 22:37:13 +0300 Subject: [PATCH 077/443] hwmon: (core) Fix double-free in __hwmon_device_register() Fix double-free that happens when thermal zone setup fails, see KASAN log below. ================================================================== BUG: KASAN: double-free or invalid-free in __hwmon_device_register+0x5dc/0xa7c CPU: 0 PID: 132 Comm: kworker/0:2 Tainted: G B 4.19.0-rc8-next-20181016-00042-gb52cd80401e9-dirty #41 Hardware name: NVIDIA Tegra SoC (Flattened Device Tree) Workqueue: events deferred_probe_work_func Backtrace: [<c0110540>] (dump_backtrace) from [<c0110944>] (show_stack+0x20/0x24) [<c0110924>] (show_stack) from [<c105cb08>] (dump_stack+0x9c/0xb0) [<c105ca6c>] (dump_stack) from [<c02fdaec>] (print_address_description+0x68/0x250) [<c02fda84>] (print_address_description) from [<c02fd4ac>] (kasan_report_invalid_free+0x68/0x88) [<c02fd444>] (kasan_report_invalid_free) from [<c02fc85c>] (__kasan_slab_free+0x1f4/0x200) [<c02fc668>] (__kasan_slab_free) from [<c02fd0c0>] (kasan_slab_free+0x14/0x18) [<c02fd0ac>] (kasan_slab_free) from [<c02f9c6c>] (kfree+0x90/0x294) [<c02f9bdc>] (kfree) from [<c0b41bbc>] (__hwmon_device_register+0x5dc/0xa7c) [<c0b415e0>] (__hwmon_device_register) from [<c0b421e8>] (hwmon_device_register_with_info+0xa0/0xa8) [<c0b42148>] (hwmon_device_register_with_info) from [<c0b42324>] (devm_hwmon_device_register_with_info+0x74/0xb4) [<c0b422b0>] (devm_hwmon_device_register_with_info) from [<c0b4481c>] (lm90_probe+0x414/0x578) [<c0b44408>] (lm90_probe) from [<c0aeeff4>] (i2c_device_probe+0x35c/0x384) [<c0aeec98>] (i2c_device_probe) from [<c08776cc>] (really_probe+0x290/0x3e4) [<c087743c>] (really_probe) from [<c0877a2c>] (driver_probe_device+0x80/0x1c4) [<c08779ac>] (driver_probe_device) from [<c0877da8>] (__device_attach_driver+0x104/0x11c) [<c0877ca4>] (__device_attach_driver) from [<c0874dd8>] (bus_for_each_drv+0xa4/0xc8) [<c0874d34>] (bus_for_each_drv) from [<c08773b0>] (__device_attach+0xf0/0x15c) [<c08772c0>] (__device_attach) from [<c0877e24>] (device_initial_probe+0x1c/0x20) [<c0877e08>] (device_initial_probe) from [<c08762f4>] (bus_probe_device+0xdc/0xec) [<c0876218>] (bus_probe_device) from [<c0876a08>] (deferred_probe_work_func+0xa8/0xd4) [<c0876960>] (deferred_probe_work_func) from [<c01527c4>] (process_one_work+0x3dc/0x96c) [<c01523e8>] (process_one_work) from [<c01541e0>] (worker_thread+0x4ec/0x8bc) [<c0153cf4>] (worker_thread) from [<c015b238>] (kthread+0x230/0x240) [<c015b008>] (kthread) from [<c01010bc>] (ret_from_fork+0x14/0x38) Exception stack(0xcf743fb0 to 0xcf743ff8) 3fa0: 00000000 00000000 00000000 00000000 3fc0: 00000000 00000000 00000000 00000000 00000000 00000000 00000000 00000000 3fe0: 00000000 00000000 00000000 00000000 00000013 00000000 Allocated by task 132: kasan_kmalloc.part.1+0x58/0xf4 kasan_kmalloc+0x90/0xa4 kmem_cache_alloc_trace+0x90/0x2a0 __hwmon_device_register+0xbc/0xa7c hwmon_device_register_with_info+0xa0/0xa8 devm_hwmon_device_register_with_info+0x74/0xb4 lm90_probe+0x414/0x578 i2c_device_probe+0x35c/0x384 really_probe+0x290/0x3e4 driver_probe_device+0x80/0x1c4 __device_attach_driver+0x104/0x11c bus_for_each_drv+0xa4/0xc8 __device_attach+0xf0/0x15c device_initial_probe+0x1c/0x20 bus_probe_device+0xdc/0xec deferred_probe_work_func+0xa8/0xd4 process_one_work+0x3dc/0x96c worker_thread+0x4ec/0x8bc kthread+0x230/0x240 ret_from_fork+0x14/0x38 (null) Freed by task 132: __kasan_slab_free+0x12c/0x200 kasan_slab_free+0x14/0x18 kfree+0x90/0x294 hwmon_dev_release+0x1c/0x20 device_release+0x4c/0xe8 kobject_put+0xac/0x11c device_unregister+0x2c/0x30 __hwmon_device_register+0xa58/0xa7c hwmon_device_register_with_info+0xa0/0xa8 devm_hwmon_device_register_with_info+0x74/0xb4 lm90_probe+0x414/0x578 i2c_device_probe+0x35c/0x384 really_probe+0x290/0x3e4 driver_probe_device+0x80/0x1c4 __device_attach_driver+0x104/0x11c bus_for_each_drv+0xa4/0xc8 __device_attach+0xf0/0x15c device_initial_probe+0x1c/0x20 bus_probe_device+0xdc/0xec deferred_probe_work_func+0xa8/0xd4 process_one_work+0x3dc/0x96c worker_thread+0x4ec/0x8bc kthread+0x230/0x240 ret_from_fork+0x14/0x38 (null) Cc: <stable@vger.kernel.org> # v4.15+ Fixes: 47c332deb8e8 ("hwmon: Deal with errors from the thermal subsystem") Signed-off-by: Dmitry Osipenko <digetx@gmail.com> Signed-off-by: Guenter Roeck <linux@roeck-us.net> --- drivers/hwmon/hwmon.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/hwmon/hwmon.c b/drivers/hwmon/hwmon.c index 975c951698846..84f61cec6319c 100644 --- a/drivers/hwmon/hwmon.c +++ b/drivers/hwmon/hwmon.c @@ -649,8 +649,10 @@ __hwmon_device_register(struct device *dev, const char *name, void *drvdata, if (info[i]->config[j] & HWMON_T_INPUT) { err = hwmon_thermal_add_sensor(dev, hwdev, j); - if (err) - goto free_device; + if (err) { + device_unregister(hdev); + goto ida_remove; + } } } } @@ -658,8 +660,6 @@ __hwmon_device_register(struct device *dev, const char *name, void *drvdata, return hdev; -free_device: - device_unregister(hdev); free_hwmon: kfree(hwdev); ida_remove: From e3e61f01d755188cb6c2dcf5a244b9c0937c258e Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven <geert@linux-m68k.org> Date: Sun, 28 Oct 2018 18:16:51 +0100 Subject: [PATCH 078/443] hwmon: (ibmpowernv) Remove bogus __init annotations If gcc decides not to inline make_sensor_label(): WARNING: vmlinux.o(.text+0x4df549c): Section mismatch in reference from the function .create_device_attrs() to the function .init.text:.make_sensor_label() The function .create_device_attrs() references the function __init .make_sensor_label(). This is often because .create_device_attrs lacks a __init annotation or the annotation of .make_sensor_label is wrong. As .probe() can be called after freeing of __init memory, all __init annotiations in the driver are bogus, and should be removed. Signed-off-by: Geert Uytterhoeven <geert@linux-m68k.org> Signed-off-by: Guenter Roeck <linux@roeck-us.net> --- drivers/hwmon/ibmpowernv.c | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/drivers/hwmon/ibmpowernv.c b/drivers/hwmon/ibmpowernv.c index 0ccca87f52719..293dd1c6c7b36 100644 --- a/drivers/hwmon/ibmpowernv.c +++ b/drivers/hwmon/ibmpowernv.c @@ -181,7 +181,7 @@ static ssize_t show_label(struct device *dev, struct device_attribute *devattr, return sprintf(buf, "%s\n", sdata->label); } -static int __init get_logical_cpu(int hwcpu) +static int get_logical_cpu(int hwcpu) { int cpu; @@ -192,9 +192,8 @@ static int __init get_logical_cpu(int hwcpu) return -ENOENT; } -static void __init make_sensor_label(struct device_node *np, - struct sensor_data *sdata, - const char *label) +static void make_sensor_label(struct device_node *np, + struct sensor_data *sdata, const char *label) { u32 id; size_t n; From 0432e833191ad4d17b7fc2364941f91dad51db1a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Holger=20Hoffst=C3=A4tte?= <holger@applied-asynchrony.com> Date: Sun, 4 Nov 2018 19:02:42 +0100 Subject: [PATCH 079/443] net: phy: realtek: fix RTL8201F sysfs name MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Since 4.19 the following error in sysfs has appeared when using the r8169 NIC driver: $cd /sys/module/realtek/drivers $ls -l ls: cannot access 'mdio_bus:RTL8201F 10/100Mbps Ethernet': No such file or directory [..garbled dir entries follow..] Apparently the forward slash in "10/100Mbps Ethernet" is interpreted as directory separator that leads nowhere, and was introduced in commit 513588dd44b ("net: phy: realtek: add RTL8201F phy-id and functions"). Fix this by removing the offending slash in the driver name. Other drivers in net/phy seem to have the same problem, but I cannot test/verify them. Fixes: 513588dd44b ("net: phy: realtek: add RTL8201F phy-id and functions") Signed-off-by: Holger Hoffstätte <holger@applied-asynchrony.com> Reviewed-by: Andrew Lunn <andrew@lunn.ch> Signed-off-by: David S. Miller <davem@davemloft.net> --- drivers/net/phy/realtek.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/phy/realtek.c b/drivers/net/phy/realtek.c index 7fc8508b5231d..271e8adc39f10 100644 --- a/drivers/net/phy/realtek.c +++ b/drivers/net/phy/realtek.c @@ -220,7 +220,7 @@ static struct phy_driver realtek_drvs[] = { .flags = PHY_HAS_INTERRUPT, }, { .phy_id = 0x001cc816, - .name = "RTL8201F 10/100Mbps Ethernet", + .name = "RTL8201F Fast Ethernet", .phy_id_mask = 0x001fffff, .features = PHY_BASIC_FEATURES, .flags = PHY_HAS_INTERRUPT, From ea53abfab960909d622ca37bcfb8e1c5378d21cc Mon Sep 17 00:00:00 2001 From: Jarod Wilson <jarod@redhat.com> Date: Sun, 4 Nov 2018 14:59:46 -0500 Subject: [PATCH 080/443] bonding/802.3ad: fix link_failure_count tracking Commit 4d2c0cda07448ea6980f00102dc3964eb25e241c set slave->link to BOND_LINK_DOWN for 802.3ad bonds whenever invalid speed/duplex values were read, to fix a problem with slaves getting into weird states, but in the process, broke tracking of link failures, as going straight to BOND_LINK_DOWN when a link is indeed down (cable pulled, switch rebooted) means we broke out of bond_miimon_inspect()'s BOND_LINK_DOWN case because !link_state was already true, we never incremented commit, and never got a chance to call bond_miimon_commit(), where slave->link_failure_count would be incremented. I believe the simple fix here is to mark the slave as BOND_LINK_FAIL, and let bond_miimon_inspect() transition the link from _FAIL to either _UP or _DOWN, and in the latter case, we now get proper incrementing of link_failure_count again. Fixes: 4d2c0cda0744 ("bonding: speed/duplex update at NETDEV_UP event") CC: Mahesh Bandewar <maheshb@google.com> CC: David S. Miller <davem@davemloft.net> CC: netdev@vger.kernel.org CC: stable@vger.kernel.org Signed-off-by: Jarod Wilson <jarod@redhat.com> Signed-off-by: David S. Miller <davem@davemloft.net> --- drivers/net/bonding/bond_main.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c index ffa37adb76817..333387f1f1fe6 100644 --- a/drivers/net/bonding/bond_main.c +++ b/drivers/net/bonding/bond_main.c @@ -3112,13 +3112,13 @@ static int bond_slave_netdev_event(unsigned long event, case NETDEV_CHANGE: /* For 802.3ad mode only: * Getting invalid Speed/Duplex values here will put slave - * in weird state. So mark it as link-down for the time + * in weird state. So mark it as link-fail for the time * being and let link-monitoring (miimon) set it right when * correct speeds/duplex are available. */ if (bond_update_speed_duplex(slave) && BOND_MODE(bond) == BOND_MODE_8023AD) - slave->link = BOND_LINK_DOWN; + slave->link = BOND_LINK_FAIL; if (BOND_MODE(bond) == BOND_MODE_8023AD) bond_3ad_adapter_speed_duplex_changed(slave); From 438ad09af5581b7024850b5dbb6353c7f2f7d8a9 Mon Sep 17 00:00:00 2001 From: Oleksij Rempel <o.rempel@pengutronix.de> Date: Thu, 18 Oct 2018 13:26:13 +0200 Subject: [PATCH 081/443] ARM: dts: imx6sll: fix typo for fsl,imx6sll-i2c node MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Fix the type of compatible string "fs,imx6sll-i2c" which should be "fsl,imx6sll-i2c". Signed-off-by: Oleksij Rempel <o.rempel@pengutronix.de> Acked-by: Uwe Kleine-König <u.kleine-koenig@pengutronix.de> Signed-off-by: Shawn Guo <shawnguo@kernel.org> --- arch/arm/boot/dts/imx6sll.dtsi | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm/boot/dts/imx6sll.dtsi b/arch/arm/boot/dts/imx6sll.dtsi index ed9a980bce850..beefa1b2049d7 100644 --- a/arch/arm/boot/dts/imx6sll.dtsi +++ b/arch/arm/boot/dts/imx6sll.dtsi @@ -740,7 +740,7 @@ i2c1: i2c@21a0000 { #address-cells = <1>; #size-cells = <0>; - compatible = "fs,imx6sll-i2c", "fsl,imx21-i2c"; + compatible = "fsl,imx6sll-i2c", "fsl,imx21-i2c"; reg = <0x021a0000 0x4000>; interrupts = <GIC_SPI 36 IRQ_TYPE_LEVEL_HIGH>; clocks = <&clks IMX6SLL_CLK_I2C1>; From 1af6ab3bac8458fc2e92ad7bb97b62de4a1fddef Mon Sep 17 00:00:00 2001 From: Rob Herring <robh@kernel.org> Date: Tue, 30 Oct 2018 20:02:30 -0500 Subject: [PATCH 082/443] ARM: dts: fsl: Fix improperly quoted stdout-path values A quoted label reference doesn't expand to the node path and is taken as a literal string. Dropping the quotes can fix this unless the baudrate string is appended in which case we have to use the alias. At least on VF610, the problem was masked by setting the console in bootargs. Use the alias syntax with baudrate parameter so we can drop setting the console in bootargs. Cc: Shawn Guo <shawnguo@kernel.org> Cc: Sascha Hauer <s.hauer@pengutronix.de> Cc: Pengutronix Kernel Team <kernel@pengutronix.de> Cc: NXP Linux Team <linux-imx@nxp.com> Cc: Mark Rutland <mark.rutland@arm.com> Reviewed-by: Fabio Estevam <festevam@gmail.com> Reviewed-by: Stefan Agner <stefan@agner.ch> Signed-off-by: Rob Herring <robh@kernel.org> Signed-off-by: Shawn Guo <shawnguo@kernel.org> --- arch/arm/boot/dts/imx53-ppd.dts | 2 +- arch/arm/boot/dts/vf610m4-colibri.dts | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/arch/arm/boot/dts/imx53-ppd.dts b/arch/arm/boot/dts/imx53-ppd.dts index b560ff88459bf..5ff9a179c83c3 100644 --- a/arch/arm/boot/dts/imx53-ppd.dts +++ b/arch/arm/boot/dts/imx53-ppd.dts @@ -55,7 +55,7 @@ }; chosen { - stdout-path = "&uart1:115200n8"; + stdout-path = "serial0:115200n8"; }; memory@70000000 { diff --git a/arch/arm/boot/dts/vf610m4-colibri.dts b/arch/arm/boot/dts/vf610m4-colibri.dts index 41ec66a969907..ca62495587602 100644 --- a/arch/arm/boot/dts/vf610m4-colibri.dts +++ b/arch/arm/boot/dts/vf610m4-colibri.dts @@ -50,8 +50,8 @@ compatible = "fsl,vf610m4"; chosen { - bootargs = "console=ttyLP2,115200 clk_ignore_unused init=/linuxrc rw"; - stdout-path = "&uart2"; + bootargs = "clk_ignore_unused init=/linuxrc rw"; + stdout-path = "serial2:115200"; }; memory@8c000000 { From 1ad9fb750a104f51851c092edd7b3553f0218428 Mon Sep 17 00:00:00 2001 From: Leonard Crestez <leonard.crestez@nxp.com> Date: Mon, 8 Oct 2018 15:28:01 +0000 Subject: [PATCH 083/443] ARM: dts: imx6sx-sdb: Fix enet phy regulator Bindings for "fixed-regulator" only explicitly support "gpio" property, not "gpios". Fix by correcting the property name. The enet PHYs on imx6sx-sdb needs to be explicitly reset after a power cycle, this can be handled by the phy-reset-gpios property. Sadly this is not handled on suspend: the fec driver turns phy-supply off but doesn't assert phy-reset-gpios again on resume. Since additional phy-level work is required to support powering off the phy in suspend fix the problem by just marking the regulator as "boot-on" "always-on" so that it's never turned off. This behavior is equivalent to older releases. Keep the phy-reset-gpios property on fec anyway because it is a correct description of board design. This issue was exposed by commit efdfeb079cc3 ("regulator: fixed: Convert to use GPIO descriptor only") which causes the "gpios" property to also be parsed. Before that commit the "gpios" property had no effect, PHY reset was only handled in the the bootloader. This fixes linux-next boot failures previously reported here: https://lore.kernel.org/patchwork/patch/982437/#1177900 https://lore.kernel.org/patchwork/patch/994091/#1178304 Signed-off-by: Leonard Crestez <leonard.crestez@nxp.com> Signed-off-by: Shawn Guo <shawnguo@kernel.org> --- arch/arm/boot/dts/imx6sx-sdb.dtsi | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/arch/arm/boot/dts/imx6sx-sdb.dtsi b/arch/arm/boot/dts/imx6sx-sdb.dtsi index 53b3408b5fab1..7d7d679945d28 100644 --- a/arch/arm/boot/dts/imx6sx-sdb.dtsi +++ b/arch/arm/boot/dts/imx6sx-sdb.dtsi @@ -117,7 +117,9 @@ regulator-name = "enet_3v3"; regulator-min-microvolt = <3300000>; regulator-max-microvolt = <3300000>; - gpios = <&gpio2 6 GPIO_ACTIVE_LOW>; + gpio = <&gpio2 6 GPIO_ACTIVE_LOW>; + regulator-boot-on; + regulator-always-on; }; reg_pcie_gpio: regulator-pcie-gpio { @@ -180,6 +182,7 @@ phy-supply = <®_enet_3v3>; phy-mode = "rgmii"; phy-handle = <ðphy1>; + phy-reset-gpios = <&gpio2 7 GPIO_ACTIVE_LOW>; status = "okay"; mdio { @@ -373,6 +376,8 @@ MX6SX_PAD_RGMII1_RD3__ENET1_RX_DATA_3 0x3081 MX6SX_PAD_RGMII1_RX_CTL__ENET1_RX_EN 0x3081 MX6SX_PAD_ENET2_RX_CLK__ENET2_REF_CLK_25M 0x91 + /* phy reset */ + MX6SX_PAD_ENET2_CRS__GPIO2_IO_7 0x10b0 >; }; From 3182215dd0b2120fb942ed88430cfb7c12d583e0 Mon Sep 17 00:00:00 2001 From: Alistair Popple <alistair@popple.id.au> Date: Tue, 30 Oct 2018 22:02:03 +1100 Subject: [PATCH 084/443] powerpc/powernv/npu: Remove NPU DMA ops The NPU IOMMU is setup to mirror the parent PCIe device IOMMU setup. Therefore it does not make sense to call dma operations such as dma_map_page(), etc. directly on these devices. The existing dma_ops simply print a warning if they are ever called, however this is unnecessary and the warnings are likely to go unnoticed. It is instead simpler to remove these operations and let the generic DMA code print warnings (eg. via a NULL pointer deref) in cases of buggy drivers attempting dma operations on NVLink devices. Signed-off-by: Alistair Popple <alistair@popple.id.au> Signed-off-by: Michael Ellerman <mpe@ellerman.id.au> --- arch/powerpc/platforms/powernv/npu-dma.c | 64 ++---------------------- 1 file changed, 4 insertions(+), 60 deletions(-) diff --git a/arch/powerpc/platforms/powernv/npu-dma.c b/arch/powerpc/platforms/powernv/npu-dma.c index 6f60e09319223..75b9352529818 100644 --- a/arch/powerpc/platforms/powernv/npu-dma.c +++ b/arch/powerpc/platforms/powernv/npu-dma.c @@ -102,63 +102,6 @@ struct pci_dev *pnv_pci_get_npu_dev(struct pci_dev *gpdev, int index) } EXPORT_SYMBOL(pnv_pci_get_npu_dev); -#define NPU_DMA_OP_UNSUPPORTED() \ - dev_err_once(dev, "%s operation unsupported for NVLink devices\n", \ - __func__) - -static void *dma_npu_alloc(struct device *dev, size_t size, - dma_addr_t *dma_handle, gfp_t flag, - unsigned long attrs) -{ - NPU_DMA_OP_UNSUPPORTED(); - return NULL; -} - -static void dma_npu_free(struct device *dev, size_t size, - void *vaddr, dma_addr_t dma_handle, - unsigned long attrs) -{ - NPU_DMA_OP_UNSUPPORTED(); -} - -static dma_addr_t dma_npu_map_page(struct device *dev, struct page *page, - unsigned long offset, size_t size, - enum dma_data_direction direction, - unsigned long attrs) -{ - NPU_DMA_OP_UNSUPPORTED(); - return 0; -} - -static int dma_npu_map_sg(struct device *dev, struct scatterlist *sglist, - int nelems, enum dma_data_direction direction, - unsigned long attrs) -{ - NPU_DMA_OP_UNSUPPORTED(); - return 0; -} - -static int dma_npu_dma_supported(struct device *dev, u64 mask) -{ - NPU_DMA_OP_UNSUPPORTED(); - return 0; -} - -static u64 dma_npu_get_required_mask(struct device *dev) -{ - NPU_DMA_OP_UNSUPPORTED(); - return 0; -} - -static const struct dma_map_ops dma_npu_ops = { - .map_page = dma_npu_map_page, - .map_sg = dma_npu_map_sg, - .alloc = dma_npu_alloc, - .free = dma_npu_free, - .dma_supported = dma_npu_dma_supported, - .get_required_mask = dma_npu_get_required_mask, -}; - /* * Returns the PE assoicated with the PCI device of the given * NPU. Returns the linked pci device if pci_dev != NULL. @@ -270,10 +213,11 @@ static void pnv_npu_dma_set_32(struct pnv_ioda_pe *npe) rc = pnv_npu_set_window(npe, 0, gpe->table_group.tables[0]); /* - * We don't initialise npu_pe->tce32_table as we always use - * dma_npu_ops which are nops. + * NVLink devices use the same TCE table configuration as + * their parent device so drivers shouldn't be doing DMA + * operations directly on these devices. */ - set_dma_ops(&npe->pdev->dev, &dma_npu_ops); + set_dma_ops(&npe->pdev->dev, NULL); } /* From e1ff516a56ad56c476b47795d3811eef79d25fbe Mon Sep 17 00:00:00 2001 From: Yi Wang <wang.yi59@zte.com.cn> Date: Mon, 5 Nov 2018 08:50:13 +0800 Subject: [PATCH 085/443] sched/fair: Fix a comment in task_numa_fault() Duplicated 'case it'. Signed-off-by: Yi Wang <wang.yi59@zte.com.cn> Reviewed-by: Xi Xu <xu.xi8@zte.com.cn> Cc: Linus Torvalds <torvalds@linux-foundation.org> Cc: Peter Zijlstra <peterz@infradead.org> Cc: Thomas Gleixner <tglx@linutronix.de> Cc: zhong.weidong@zte.com.cn Link: http://lkml.kernel.org/r/1541379013-11352-1-git-send-email-wang.yi59@zte.com.cn Signed-off-by: Ingo Molnar <mingo@kernel.org> --- kernel/sched/fair.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c index ee271bb661cc9..3648d0300fdf9 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c @@ -2400,8 +2400,8 @@ void task_numa_fault(int last_cpupid, int mem_node, int pages, int flags) local = 1; /* - * Retry task to preferred node migration periodically, in case it - * case it previously failed, or the scheduler moved us. + * Retry to migrate task to preferred node periodically, in case it + * previously failed, or the scheduler moved us. */ if (time_after(jiffies, p->numa_migrate_retry)) { task_numa_placement(p); From f75d651587f719a813ebbbfeee570e6570731d55 Mon Sep 17 00:00:00 2001 From: Randy Dunlap <rdunlap@infradead.org> Date: Sun, 4 Nov 2018 18:40:14 -0800 Subject: [PATCH 086/443] resource/docs: Fix new kernel-doc warnings The first group of warnings is caused by a "/**" kernel-doc notation marker but the function comments are not in kernel-doc format. Also add another error return value here. ../kernel/resource.c:337: warning: Function parameter or member 'start' not described in 'find_next_iomem_res' ../kernel/resource.c:337: warning: Function parameter or member 'end' not described in 'find_next_iomem_res' ../kernel/resource.c:337: warning: Function parameter or member 'flags' not described in 'find_next_iomem_res' ../kernel/resource.c:337: warning: Function parameter or member 'desc' not described in 'find_next_iomem_res' ../kernel/resource.c:337: warning: Function parameter or member 'first_lvl' not described in 'find_next_iomem_res' ../kernel/resource.c:337: warning: Function parameter or member 'res' not described in 'find_next_iomem_res' Add the missing function parameter documentation for the other warnings: ../kernel/resource.c:409: warning: Function parameter or member 'arg' not described in 'walk_iomem_res_desc' ../kernel/resource.c:409: warning: Function parameter or member 'func' not described in 'walk_iomem_res_desc' Signed-off-by: Randy Dunlap <rdunlap@infradead.org> Cc: Andrew Morton <akpm@linux-foundation.org> Cc: Borislav Petkov <bp@suse.de> Cc: Linus Torvalds <torvalds@linux-foundation.org> Cc: Peter Zijlstra <peterz@infradead.org> Cc: Thomas Gleixner <tglx@linutronix.de> Fixes: b69c2e20f6e4 ("resource: Clean it up a bit") Link: http://lkml.kernel.org/r/dda2e4d8-bedd-3167-20fe-8c7d2d35b354@infradead.org Signed-off-by: Ingo Molnar <mingo@kernel.org> --- kernel/resource.c | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/kernel/resource.c b/kernel/resource.c index b3a3a1fc499ea..17bcb189d5306 100644 --- a/kernel/resource.c +++ b/kernel/resource.c @@ -318,14 +318,14 @@ int release_resource(struct resource *old) EXPORT_SYMBOL(release_resource); -/** +/* * Finds the lowest iomem resource that covers part of [start..end]. The * caller must specify start, end, flags, and desc (which may be * IORES_DESC_NONE). * * If a resource is found, returns 0 and *res is overwritten with the part * of the resource that's within [start..end]; if none is found, returns - * -1. + * -1. Returns -EINVAL for other invalid parameters. * * This function walks the whole tree and not just first level children * unless @first_lvl is true. @@ -390,7 +390,9 @@ static int __walk_iomem_res_desc(resource_size_t start, resource_size_t end, } /** - * Walks through iomem resources and calls func() with matching resource + * walk_iomem_res_desc - walk through iomem resources + * + * Walks through iomem resources and calls @func() with matching resource * ranges. This walks through whole tree and not just first level children. * All the memory ranges which overlap start,end and also match flags and * desc are valid candidates. @@ -399,6 +401,8 @@ static int __walk_iomem_res_desc(resource_size_t start, resource_size_t end, * @flags: I/O resource flags * @start: start addr * @end: end addr + * @arg: function argument for the callback @func + * @func: callback function that is called for each qualifying resource area * * NOTE: For a new descriptor search, define a new IORES_DESC in * <linux/ioport.h> and set it in 'desc' of a target resource entry. From b068621a53f92f42400581d69ad0e84c56620b0a Mon Sep 17 00:00:00 2001 From: Randy Dunlap <rdunlap@infradead.org> Date: Sun, 4 Nov 2018 14:03:56 -0800 Subject: [PATCH 087/443] Documentation/x86: Fix typo in zero-page.txt Signed-off-by: Randy Dunlap <rdunlap@infradead.org> Cc: Jonathan Corbet <corbet@lwn.net> Cc: Linus Torvalds <torvalds@linux-foundation.org> Cc: Peter Zijlstra <peterz@infradead.org> Cc: Thomas Gleixner <tglx@linutronix.de> Cc: linux-doc@vger.kernel.org Link: http://lkml.kernel.org/r/f259b21b-1f2b-f215-00d2-23388bed2530@infradead.org Signed-off-by: Ingo Molnar <mingo@kernel.org> --- Documentation/x86/zero-page.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Documentation/x86/zero-page.txt b/Documentation/x86/zero-page.txt index 97b7adbceda48..68aed077f7b62 100644 --- a/Documentation/x86/zero-page.txt +++ b/Documentation/x86/zero-page.txt @@ -25,7 +25,7 @@ Offset Proto Name Meaning 0C8/004 ALL ext_cmd_line_ptr cmd_line_ptr high 32bits 140/080 ALL edid_info Video mode setup (struct edid_info) 1C0/020 ALL efi_info EFI 32 information (struct efi_info) -1E0/004 ALL alk_mem_k Alternative mem check, in KB +1E0/004 ALL alt_mem_k Alternative mem check, in KB 1E4/004 ALL scratch Scratch field for the kernel setup code 1E8/001 ALL e820_entries Number of entries in e820_table (below) 1E9/001 ALL eddbuf_entries Number of entries in eddbuf (below) From 8727b230f665cadb9349a915c60e6abb18fb083c Mon Sep 17 00:00:00 2001 From: Dan Carpenter <dan.carpenter@oracle.com> Date: Sat, 13 Oct 2018 13:21:40 +0300 Subject: [PATCH 088/443] drm/exynos: checking for NULL instead of IS_ERR() The of_drm_find_panel() function returns error pointers and never NULL but we the driver assumes that ->panel is NULL when it's not present. Fixes: 6afb7721e2a0 ("drm/exynos: move connector creation to attach callback") Signed-off-by: Dan Carpenter <dan.carpenter@oracle.com> Signed-off-by: Inki Dae <inki.dae@samsung.com> --- drivers/gpu/drm/exynos/exynos_drm_dsi.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/exynos/exynos_drm_dsi.c b/drivers/gpu/drm/exynos/exynos_drm_dsi.c index 07af7758066db..32f2567497890 100644 --- a/drivers/gpu/drm/exynos/exynos_drm_dsi.c +++ b/drivers/gpu/drm/exynos/exynos_drm_dsi.c @@ -1527,7 +1527,9 @@ static int exynos_dsi_host_attach(struct mipi_dsi_host *host, } dsi->panel = of_drm_find_panel(device->dev.of_node); - if (dsi->panel) { + if (IS_ERR(dsi->panel)) { + dsi->panel = NULL; + } else { drm_panel_attach(dsi->panel, &dsi->connector); dsi->connector.status = connector_status_connected; } From 6ca469e22a30992b4478d2ab88737c70667c1e00 Mon Sep 17 00:00:00 2001 From: Inki Dae <inki.dae@samsung.com> Date: Fri, 5 Oct 2018 11:50:20 +0900 Subject: [PATCH 089/443] Revert "drm/exynos/decon5433: implement frame counter" This reverts commit 0586feba322e1de05075700eb4b835c8b683e62b This patch makes it to need get_vblank_counter callback in crtc to get frame counter from decon driver. However, drm_dev->max_vblank_count is a member unique to vendor's DRM driver but in case of ARM DRM, some CRTC devices don't provide the frame counter value. As a result, this patch made extension and clone mode not working. Instead of this patch, we may need separated max_vblank_count which belongs to each CRTC device, or need to implement frame counter emulation for them who don't support HW frame counter. Signed-off-by: Inki Dae <inki.dae@samsung.com> --- drivers/gpu/drm/exynos/exynos5433_drm_decon.c | 9 --------- drivers/gpu/drm/exynos/exynos_drm_crtc.c | 11 ----------- drivers/gpu/drm/exynos/exynos_drm_drv.h | 1 - 3 files changed, 21 deletions(-) diff --git a/drivers/gpu/drm/exynos/exynos5433_drm_decon.c b/drivers/gpu/drm/exynos/exynos5433_drm_decon.c index 94529aa823392..aef487dd87315 100644 --- a/drivers/gpu/drm/exynos/exynos5433_drm_decon.c +++ b/drivers/gpu/drm/exynos/exynos5433_drm_decon.c @@ -164,13 +164,6 @@ static u32 decon_get_frame_count(struct decon_context *ctx, bool end) return frm; } -static u32 decon_get_vblank_counter(struct exynos_drm_crtc *crtc) -{ - struct decon_context *ctx = crtc->ctx; - - return decon_get_frame_count(ctx, false); -} - static void decon_setup_trigger(struct decon_context *ctx) { if (!ctx->crtc->i80_mode && !(ctx->out_type & I80_HW_TRG)) @@ -536,7 +529,6 @@ static const struct exynos_drm_crtc_ops decon_crtc_ops = { .disable = decon_disable, .enable_vblank = decon_enable_vblank, .disable_vblank = decon_disable_vblank, - .get_vblank_counter = decon_get_vblank_counter, .atomic_begin = decon_atomic_begin, .update_plane = decon_update_plane, .disable_plane = decon_disable_plane, @@ -554,7 +546,6 @@ static int decon_bind(struct device *dev, struct device *master, void *data) int ret; ctx->drm_dev = drm_dev; - drm_dev->max_vblank_count = 0xffffffff; for (win = ctx->first_win; win < WINDOWS_NR; win++) { ctx->configs[win].pixel_formats = decon_formats; diff --git a/drivers/gpu/drm/exynos/exynos_drm_crtc.c b/drivers/gpu/drm/exynos/exynos_drm_crtc.c index eea90251808fa..2696289ecc78f 100644 --- a/drivers/gpu/drm/exynos/exynos_drm_crtc.c +++ b/drivers/gpu/drm/exynos/exynos_drm_crtc.c @@ -162,16 +162,6 @@ static void exynos_drm_crtc_disable_vblank(struct drm_crtc *crtc) exynos_crtc->ops->disable_vblank(exynos_crtc); } -static u32 exynos_drm_crtc_get_vblank_counter(struct drm_crtc *crtc) -{ - struct exynos_drm_crtc *exynos_crtc = to_exynos_crtc(crtc); - - if (exynos_crtc->ops->get_vblank_counter) - return exynos_crtc->ops->get_vblank_counter(exynos_crtc); - - return 0; -} - static const struct drm_crtc_funcs exynos_crtc_funcs = { .set_config = drm_atomic_helper_set_config, .page_flip = drm_atomic_helper_page_flip, @@ -181,7 +171,6 @@ static const struct drm_crtc_funcs exynos_crtc_funcs = { .atomic_destroy_state = drm_atomic_helper_crtc_destroy_state, .enable_vblank = exynos_drm_crtc_enable_vblank, .disable_vblank = exynos_drm_crtc_disable_vblank, - .get_vblank_counter = exynos_drm_crtc_get_vblank_counter, }; struct exynos_drm_crtc *exynos_drm_crtc_create(struct drm_device *drm_dev, diff --git a/drivers/gpu/drm/exynos/exynos_drm_drv.h b/drivers/gpu/drm/exynos/exynos_drm_drv.h index ec9604f1272b5..5e61e707f9555 100644 --- a/drivers/gpu/drm/exynos/exynos_drm_drv.h +++ b/drivers/gpu/drm/exynos/exynos_drm_drv.h @@ -135,7 +135,6 @@ struct exynos_drm_crtc_ops { void (*disable)(struct exynos_drm_crtc *crtc); int (*enable_vblank)(struct exynos_drm_crtc *crtc); void (*disable_vblank)(struct exynos_drm_crtc *crtc); - u32 (*get_vblank_counter)(struct exynos_drm_crtc *crtc); enum drm_mode_status (*mode_valid)(struct exynos_drm_crtc *crtc, const struct drm_display_mode *mode); bool (*mode_fixup)(struct exynos_drm_crtc *crtc, From deee3284cba3145a4ee03cbe8541d7a3bfc499e2 Mon Sep 17 00:00:00 2001 From: Andrzej Hajda <a.hajda@samsung.com> Date: Fri, 26 Oct 2018 12:40:03 +0200 Subject: [PATCH 090/443] drm/exynos/dsi: register connector if it is created after drm bind DSI device can be attached after DRM device is registered. In such case newly created connector must be registered by exynos_dsi. The patch fixes exynos_drm on rinato and trats boards. Fixes: 6afb7721e2a0 ("drm/exynos: move connector creation to attach callback") Reported-by: Marek Szyprowski <m.szyprowski@samsung.com> Signed-off-by: Andrzej Hajda <a.hajda@samsung.com> Tested-by: Marek Szyprowski <m.szyprowski@samsung.com> Signed-off-by: Inki Dae <inki.dae@samsung.com> --- drivers/gpu/drm/exynos/exynos_drm_dsi.c | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/exynos/exynos_drm_dsi.c b/drivers/gpu/drm/exynos/exynos_drm_dsi.c index 32f2567497890..d81e62ae286ae 100644 --- a/drivers/gpu/drm/exynos/exynos_drm_dsi.c +++ b/drivers/gpu/drm/exynos/exynos_drm_dsi.c @@ -14,6 +14,7 @@ #include <drm/drmP.h> #include <drm/drm_crtc_helper.h> +#include <drm/drm_fb_helper.h> #include <drm/drm_mipi_dsi.h> #include <drm/drm_panel.h> #include <drm/drm_atomic_helper.h> @@ -1474,12 +1475,12 @@ static int exynos_dsi_create_connector(struct drm_encoder *encoder) { struct exynos_dsi *dsi = encoder_to_dsi(encoder); struct drm_connector *connector = &dsi->connector; + struct drm_device *drm = encoder->dev; int ret; connector->polled = DRM_CONNECTOR_POLL_HPD; - ret = drm_connector_init(encoder->dev, connector, - &exynos_dsi_connector_funcs, + ret = drm_connector_init(drm, connector, &exynos_dsi_connector_funcs, DRM_MODE_CONNECTOR_DSI); if (ret) { DRM_ERROR("Failed to initialize connector with drm\n"); @@ -1489,7 +1490,12 @@ static int exynos_dsi_create_connector(struct drm_encoder *encoder) connector->status = connector_status_disconnected; drm_connector_helper_add(connector, &exynos_dsi_connector_helper_funcs); drm_connector_attach_encoder(connector, encoder); + if (!drm->registered) + return 0; + connector->funcs->reset(connector); + drm_fb_helper_add_one_connector(drm->fb_helper, connector); + drm_connector_register(connector); return 0; } From 989534cfcac89f927fa46b1e0861d92ffbd2c7e4 Mon Sep 17 00:00:00 2001 From: Andrzej Hajda <a.hajda@samsung.com> Date: Fri, 26 Oct 2018 12:13:28 +0200 Subject: [PATCH 091/443] drm/exynos/fbdev: do not skip fbdev init if there are no connectors Since connectors can be created dynamically, fbdev should be initialized even if there are no connectors at the moment. Otherwise fbdev will not be created even after connector's appearance. The patch fixes lack of fbdev on rinato and trats boards. Fixes: 6afb7721e2a0 ("drm/exynos: move connector creation to attach callback") Reported-by: Marek Szyprowski <m.szyprowski@samsung.com> Signed-off-by: Andrzej Hajda <a.hajda@samsung.com> Tested-by: Marek Szyprowski <m.szyprowski@samsung.com> Signed-off-by: Inki Dae <inki.dae@samsung.com> --- drivers/gpu/drm/exynos/exynos_drm_fbdev.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/exynos/exynos_drm_fbdev.c b/drivers/gpu/drm/exynos/exynos_drm_fbdev.c index 918dd2c822098..01d182289efa3 100644 --- a/drivers/gpu/drm/exynos/exynos_drm_fbdev.c +++ b/drivers/gpu/drm/exynos/exynos_drm_fbdev.c @@ -192,7 +192,7 @@ int exynos_drm_fbdev_init(struct drm_device *dev) struct drm_fb_helper *helper; int ret; - if (!dev->mode_config.num_crtc || !dev->mode_config.num_connector) + if (!dev->mode_config.num_crtc) return 0; fbdev = kzalloc(sizeof(*fbdev), GFP_KERNEL); From be2e1c9dcf76886a83fb1c433a316e26d4ca2550 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann <arnd@arndb.de> Date: Thu, 11 Oct 2018 13:06:16 +0200 Subject: [PATCH 092/443] mtd: docg3: don't set conflicting BCH_CONST_PARAMS option I noticed during the creation of another bugfix that the BCH_CONST_PARAMS option that is set by DOCG3 breaks setting variable parameters for any other users of the BCH library code. The only other user we have today is the MTD_NAND software BCH implementation (most flash controllers use hardware BCH these days and are not affected). I considered removing BCH_CONST_PARAMS entirely because of the inherent conflict, but according to the description in lib/bch.c there is a significant performance benefit in keeping it. To avoid the immediate problem of the conflict between MTD_NAND_BCH and DOCG3, this only sets the constant parameters if MTD_NAND_BCH is disabled, which should fix the problem for all cases that are affected. This should also work for all stable kernels. Note that there is only one machine that actually seems to use the DOCG3 driver (arch/arm/mach-pxa/mioa701.c), so most users should have the driver disabled, but it almost certainly shows up if we wanted to test random kernels on machines that use software BCH in MTD. Fixes: d13d19ece39f ("mtd: docg3: add ECC correction code") Cc: stable@vger.kernel.org Cc: Robert Jarzmik <robert.jarzmik@free.fr> Signed-off-by: Arnd Bergmann <arnd@arndb.de> Signed-off-by: Boris Brezillon <boris.brezillon@bootlin.com> --- drivers/mtd/devices/Kconfig | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/mtd/devices/Kconfig b/drivers/mtd/devices/Kconfig index e514d57a0419d..aa983422aa970 100644 --- a/drivers/mtd/devices/Kconfig +++ b/drivers/mtd/devices/Kconfig @@ -207,7 +207,7 @@ comment "Disk-On-Chip Device Drivers" config MTD_DOCG3 tristate "M-Systems Disk-On-Chip G3" select BCH - select BCH_CONST_PARAMS + select BCH_CONST_PARAMS if !MTD_NAND_BCH select BITREVERSE help This provides an MTD device driver for the M-Systems DiskOnChip From ce97e2bb6687d3af675ecf2e836a9122def53578 Mon Sep 17 00:00:00 2001 From: Randy Dunlap <rdunlap@infradead.org> Date: Sun, 4 Nov 2018 18:38:35 -0800 Subject: [PATCH 093/443] mtd: nand: drop kernel-doc notation for a deleted function parameter Remove kernel-doc notation for a deleted function parameter to prevent a kernel-doc warning: ../drivers/mtd/nand/raw/nand_base.c:603: warning: Excess function parameter 'mtd' description in 'panic_nand_wait' Fixes: f1d46942e823 ("mtd: rawnand: Pass a nand_chip object to chip->waitfunc()") Signed-off-by: Randy Dunlap <rdunlap@infradead.org> Cc: Boris Brezillon <boris.brezillon@bootlin.com> Cc: Miquel Raynal <miquel.raynal@bootlin.com> Cc: Richard Weinberger <richard@nod.at> Reviewed-by: Miquel Raynal <miquel.raynal@bootlin.com> Signed-off-by: Boris Brezillon <boris.brezillon@bootlin.com> --- drivers/mtd/nand/raw/nand_base.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/mtd/nand/raw/nand_base.c b/drivers/mtd/nand/raw/nand_base.c index 05bd0779fe9bf..71050a0b31dfe 100644 --- a/drivers/mtd/nand/raw/nand_base.c +++ b/drivers/mtd/nand/raw/nand_base.c @@ -590,7 +590,6 @@ nand_get_device(struct mtd_info *mtd, int new_state) /** * panic_nand_wait - [GENERIC] wait until the command is done - * @mtd: MTD device structure * @chip: NAND chip structure * @timeo: timeout * From d098093ba06eb032057d1aca1c2e45889e099d00 Mon Sep 17 00:00:00 2001 From: Boris Brezillon <boris.brezillon@bootlin.com> Date: Sun, 28 Oct 2018 12:29:55 +0100 Subject: [PATCH 094/443] mtd: nand: Fix nanddev_neraseblocks() nanddev_neraseblocks() currently returns the number pages per LUN instead of the total number of eraseblocks. Fixes: 9c3736a3de21 ("mtd: nand: Add core infrastructure to deal with NAND devices") Cc: <stable@vger.kernel.org> Signed-off-by: Boris Brezillon <boris.brezillon@bootlin.com> Reviewed-by: Miquel Raynal <miquel.raynal@bootlin.com> --- include/linux/mtd/nand.h | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/include/linux/mtd/nand.h b/include/linux/mtd/nand.h index abe975c87b900..78b86dea2f294 100644 --- a/include/linux/mtd/nand.h +++ b/include/linux/mtd/nand.h @@ -324,9 +324,8 @@ static inline unsigned int nanddev_ntargets(const struct nand_device *nand) */ static inline unsigned int nanddev_neraseblocks(const struct nand_device *nand) { - return (u64)nand->memorg.luns_per_target * - nand->memorg.eraseblocks_per_lun * - nand->memorg.pages_per_eraseblock; + return nand->memorg.ntargets * nand->memorg.luns_per_target * + nand->memorg.eraseblocks_per_lun; } /** From e39f9dd8206ad66992ac0e6218ef1ba746f2cce9 Mon Sep 17 00:00:00 2001 From: Jerome Brunet <jbrunet@baylibre.com> Date: Tue, 23 Oct 2018 18:03:19 +0200 Subject: [PATCH 095/443] pinctrl: meson: fix pinconf bias disable If a bias is enabled on a pin of an Amlogic SoC, calling .pin_config_set() with PIN_CONFIG_BIAS_DISABLE will not disable the bias. Instead it will force a pull-down bias on the pin. Instead of the pull type register bank, the driver should access the pull enable register bank. Fixes: 6ac730951104 ("pinctrl: add driver for Amlogic Meson SoCs") Signed-off-by: Jerome Brunet <jbrunet@baylibre.com> Acked-by: Neil Armstrong <narmstrong@baylibre.com> Signed-off-by: Linus Walleij <linus.walleij@linaro.org> --- drivers/pinctrl/meson/pinctrl-meson.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/pinctrl/meson/pinctrl-meson.c b/drivers/pinctrl/meson/pinctrl-meson.c index f8b778a7d4717..53d449076dee3 100644 --- a/drivers/pinctrl/meson/pinctrl-meson.c +++ b/drivers/pinctrl/meson/pinctrl-meson.c @@ -192,7 +192,7 @@ static int meson_pinconf_set(struct pinctrl_dev *pcdev, unsigned int pin, dev_dbg(pc->dev, "pin %u: disable bias\n", pin); meson_calc_reg_and_bit(bank, pin, REG_PULL, ®, &bit); - ret = regmap_update_bits(pc->reg_pull, reg, + ret = regmap_update_bits(pc->reg_pullen, reg, BIT(bit), 0); if (ret) return ret; From 4bc51e1e350cd4707ce6e551a93eae26d40b9889 Mon Sep 17 00:00:00 2001 From: Jerome Brunet <jbrunet@baylibre.com> Date: Mon, 29 Oct 2018 16:13:37 +0100 Subject: [PATCH 096/443] pinctrl: meson: fix gxbb ao pull register bits AO pull register definition is inverted between pull (up/down) and pull enable. Fixing this allows to properly apply bias setting through pinconf Fixes: 468c234f9ed7 ("pinctrl: amlogic: Add support for Amlogic Meson GXBB SoC") Signed-off-by: Jerome Brunet <jbrunet@baylibre.com> Signed-off-by: Linus Walleij <linus.walleij@linaro.org> --- drivers/pinctrl/meson/pinctrl-meson-gxbb.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/pinctrl/meson/pinctrl-meson-gxbb.c b/drivers/pinctrl/meson/pinctrl-meson-gxbb.c index 4ceb06f8a33c9..4edeb4cae72aa 100644 --- a/drivers/pinctrl/meson/pinctrl-meson-gxbb.c +++ b/drivers/pinctrl/meson/pinctrl-meson-gxbb.c @@ -830,7 +830,7 @@ static struct meson_bank meson_gxbb_periphs_banks[] = { static struct meson_bank meson_gxbb_aobus_banks[] = { /* name first last irq pullen pull dir out in */ - BANK("AO", GPIOAO_0, GPIOAO_13, 0, 13, 0, 0, 0, 16, 0, 0, 0, 16, 1, 0), + BANK("AO", GPIOAO_0, GPIOAO_13, 0, 13, 0, 16, 0, 0, 0, 0, 0, 16, 1, 0), }; static struct meson_pinctrl_data meson_gxbb_periphs_pinctrl_data = { From ed3a2b74f3eb34c84c8377353f4730f05acdfd05 Mon Sep 17 00:00:00 2001 From: Jerome Brunet <jbrunet@baylibre.com> Date: Mon, 29 Oct 2018 16:13:38 +0100 Subject: [PATCH 097/443] pinctrl: meson: fix gxl ao pull register bits AO pull register definition is inverted between pull (up/down) and pull enable. Fixing this allows to properly apply bias setting through pinconf Fixes: 0f15f500ff2c ("pinctrl: meson: Add GXL pinctrl definitions") Signed-off-by: Jerome Brunet <jbrunet@baylibre.com> Signed-off-by: Linus Walleij <linus.walleij@linaro.org> --- drivers/pinctrl/meson/pinctrl-meson-gxl.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/pinctrl/meson/pinctrl-meson-gxl.c b/drivers/pinctrl/meson/pinctrl-meson-gxl.c index 7dae1d7bf6b0a..158f618f16957 100644 --- a/drivers/pinctrl/meson/pinctrl-meson-gxl.c +++ b/drivers/pinctrl/meson/pinctrl-meson-gxl.c @@ -807,7 +807,7 @@ static struct meson_bank meson_gxl_periphs_banks[] = { static struct meson_bank meson_gxl_aobus_banks[] = { /* name first last irq pullen pull dir out in */ - BANK("AO", GPIOAO_0, GPIOAO_9, 0, 9, 0, 0, 0, 16, 0, 0, 0, 16, 1, 0), + BANK("AO", GPIOAO_0, GPIOAO_9, 0, 9, 0, 16, 0, 0, 0, 0, 0, 16, 1, 0), }; static struct meson_pinctrl_data meson_gxl_periphs_pinctrl_data = { From e91b162d2868672d06010f34aa83d408db13d3c6 Mon Sep 17 00:00:00 2001 From: Jerome Brunet <jbrunet@baylibre.com> Date: Mon, 29 Oct 2018 16:13:39 +0100 Subject: [PATCH 098/443] pinctrl: meson: fix meson8 ao pull register bits AO pull register definition is inverted between pull (up/down) and pull enable. Fixing this allows to properly apply bias setting through pinconf Fixes: 6ac730951104 ("pinctrl: add driver for Amlogic Meson SoCs") Signed-off-by: Jerome Brunet <jbrunet@baylibre.com> Signed-off-by: Linus Walleij <linus.walleij@linaro.org> --- drivers/pinctrl/meson/pinctrl-meson8.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/pinctrl/meson/pinctrl-meson8.c b/drivers/pinctrl/meson/pinctrl-meson8.c index c6d79315218fa..86466173114da 100644 --- a/drivers/pinctrl/meson/pinctrl-meson8.c +++ b/drivers/pinctrl/meson/pinctrl-meson8.c @@ -1053,7 +1053,7 @@ static struct meson_bank meson8_cbus_banks[] = { static struct meson_bank meson8_aobus_banks[] = { /* name first last irq pullen pull dir out in */ - BANK("AO", GPIOAO_0, GPIO_TEST_N, 0, 13, 0, 0, 0, 16, 0, 0, 0, 16, 1, 0), + BANK("AO", GPIOAO_0, GPIO_TEST_N, 0, 13, 0, 16, 0, 0, 0, 0, 0, 16, 1, 0), }; static struct meson_pinctrl_data meson8_cbus_pinctrl_data = { From a1705f02704cd8a24d434bfd0141ee8142ad277a Mon Sep 17 00:00:00 2001 From: Jerome Brunet <jbrunet@baylibre.com> Date: Mon, 29 Oct 2018 16:13:40 +0100 Subject: [PATCH 099/443] pinctrl: meson: fix meson8b ao pull register bits AO pull register definition is inverted between pull (up/down) and pull enable. Fixing this allows to properly apply bias setting through pinconf Fixes: 0fefcb6876d0 ("pinctrl: Add support for Meson8b") Signed-off-by: Jerome Brunet <jbrunet@baylibre.com> Signed-off-by: Linus Walleij <linus.walleij@linaro.org> --- drivers/pinctrl/meson/pinctrl-meson8b.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/pinctrl/meson/pinctrl-meson8b.c b/drivers/pinctrl/meson/pinctrl-meson8b.c index bb2a30964fc69..647ad15d5c3c4 100644 --- a/drivers/pinctrl/meson/pinctrl-meson8b.c +++ b/drivers/pinctrl/meson/pinctrl-meson8b.c @@ -906,7 +906,7 @@ static struct meson_bank meson8b_cbus_banks[] = { static struct meson_bank meson8b_aobus_banks[] = { /* name first lastc irq pullen pull dir out in */ - BANK("AO", GPIOAO_0, GPIO_TEST_N, 0, 13, 0, 0, 0, 16, 0, 0, 0, 16, 1, 0), + BANK("AO", GPIOAO_0, GPIO_TEST_N, 0, 13, 0, 16, 0, 0, 0, 0, 0, 16, 1, 0), }; static struct meson_pinctrl_data meson8b_cbus_pinctrl_data = { From 4920b1f7676d55dcebdf55f2b0431a328acb4abe Mon Sep 17 00:00:00 2001 From: Punit Agrawal <punit.agrawal@arm.com> Date: Fri, 2 Nov 2018 16:57:52 +0000 Subject: [PATCH 100/443] mailmap: Update email for Punit Agrawal As I'll no longer be working with Arm, add a mailmap entry so any mail directed towards me reaches the appropriate mailbox. Acked-by: Will Deacon <will.deacon@arm.com> Signed-off-by: Punit Agrawal <punit.agrawal@arm.com> Signed-off-by: Catalin Marinas <catalin.marinas@arm.com> --- .mailmap | 1 + 1 file changed, 1 insertion(+) diff --git a/.mailmap b/.mailmap index a76be45fef6ca..28fecafa65069 100644 --- a/.mailmap +++ b/.mailmap @@ -159,6 +159,7 @@ Peter Oruba <peter@oruba.de> Peter Oruba <peter.oruba@amd.com> Pratyush Anand <pratyush.anand@gmail.com> <pratyush.anand@st.com> Praveen BP <praveenbp@ti.com> +Punit Agrawal <punitagrawal@gmail.com> <punit.agrawal@arm.com> Qais Yousef <qsyousef@gmail.com> <qais.yousef@imgtec.com> Oleksij Rempel <linux@rempel-privat.de> <bug-track@fisher-privat.net> Oleksij Rempel <linux@rempel-privat.de> <external.Oleksij.Rempel@de.bosch.com> From aab7a2414ba0d5c3d0571a90031b535adba7146a Mon Sep 17 00:00:00 2001 From: Kuninori Morimoto <kuninori.morimoto.gx@renesas.com> Date: Fri, 28 Sep 2018 02:38:36 +0000 Subject: [PATCH 101/443] arm64: dts: renesas: r8a7795: add missing dma-names on hscif2 hscif2 has 4 dmas, but has only 2 dma-names. This patch add missing dma-names. Signed-off-by: Kuninori Morimoto <kuninori.morimoto.gx@renesas.com> Reviewed-by: Geert Uytterhoeven <geert+renesas@glider.be> Fixes: e0f0bda79337701a ("arm64: dts: renesas: r8a7795: sort subnodes of the soc node") Signed-off-by: Simon Horman <horms+renesas@verge.net.au> --- arch/arm64/boot/dts/renesas/r8a7795.dtsi | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm64/boot/dts/renesas/r8a7795.dtsi b/arch/arm64/boot/dts/renesas/r8a7795.dtsi index b5f2273caca4d..a79c8d369e0b4 100644 --- a/arch/arm64/boot/dts/renesas/r8a7795.dtsi +++ b/arch/arm64/boot/dts/renesas/r8a7795.dtsi @@ -652,7 +652,7 @@ clock-names = "fck", "brg_int", "scif_clk"; dmas = <&dmac1 0x35>, <&dmac1 0x34>, <&dmac2 0x35>, <&dmac2 0x34>; - dma-names = "tx", "rx"; + dma-names = "tx", "rx", "tx", "rx"; power-domains = <&sysc R8A7795_PD_ALWAYS_ON>; resets = <&cpg 518>; status = "disabled"; From 058ad7b6aa5204d3af878415c7b946748ab34f7a Mon Sep 17 00:00:00 2001 From: Fabrizio Castro <fabrizio.castro@bp.renesas.com> Date: Thu, 4 Oct 2018 09:53:10 +0100 Subject: [PATCH 102/443] dt-bindings: arm: Fix RZ/G2E part number Fix RZ/G2E part number from its description. Signed-off-by: Fabrizio Castro <fabrizio.castro@bp.renesas.com> Reviewed-by: Biju Das <biju.das@bp.renesas.com> Reviewed-by: Rob Herring <robh@kernel.org> Signed-off-by: Simon Horman <horms+renesas@verge.net.au> --- Documentation/devicetree/bindings/arm/shmobile.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Documentation/devicetree/bindings/arm/shmobile.txt b/Documentation/devicetree/bindings/arm/shmobile.txt index f5e0f82fd5031..58c4256d37a39 100644 --- a/Documentation/devicetree/bindings/arm/shmobile.txt +++ b/Documentation/devicetree/bindings/arm/shmobile.txt @@ -27,7 +27,7 @@ SoCs: compatible = "renesas,r8a77470" - RZ/G2M (R8A774A1) compatible = "renesas,r8a774a1" - - RZ/G2E (RA8774C0) + - RZ/G2E (R8A774C0) compatible = "renesas,r8a774c0" - R-Car M1A (R8A77781) compatible = "renesas,r8a7778" From eab53fdfd60a84b0cc514d4f1f5d79226c76df01 Mon Sep 17 00:00:00 2001 From: Sergei Shtylyov <sergei.shtylyov@cogentembedded.com> Date: Thu, 18 Oct 2018 19:48:53 +0300 Subject: [PATCH 103/443] arm64: dts: renesas: condor: switch from EtherAVB to GEther The "official" Condor boards have always been wired to mount NFS via GEther, not EtherAVB -- the boards resoldered for EtherAVB were local to Cogent Embedded, so we've been having an unpleasant situation where a "normal" Condor board still can't mount NFS (unless an EtherAVB PHY extension board is plugged in). Switch from EtherAVB to GEther at last! Fixes: 8091788f3d38 ("arm64: dts: renesas: condor: add EtherAVB support") Signed-off-by: Sergei Shtylyov <sergei.shtylyov@cogentembedded.com> Signed-off-by: Simon Horman <horms+renesas@verge.net.au> --- .../boot/dts/renesas/r8a77980-condor.dts | 47 ++++++++++--------- 1 file changed, 24 insertions(+), 23 deletions(-) diff --git a/arch/arm64/boot/dts/renesas/r8a77980-condor.dts b/arch/arm64/boot/dts/renesas/r8a77980-condor.dts index fe2e2c051cc93..5a7012be0d6ad 100644 --- a/arch/arm64/boot/dts/renesas/r8a77980-condor.dts +++ b/arch/arm64/boot/dts/renesas/r8a77980-condor.dts @@ -15,7 +15,7 @@ aliases { serial0 = &scif0; - ethernet0 = &avb; + ethernet0 = &gether; }; chosen { @@ -97,23 +97,6 @@ }; }; -&avb { - pinctrl-0 = <&avb_pins>; - pinctrl-names = "default"; - - phy-mode = "rgmii-id"; - phy-handle = <&phy0>; - renesas,no-ether-link; - status = "okay"; - - phy0: ethernet-phy@0 { - rxc-skew-ps = <1500>; - reg = <0>; - interrupt-parent = <&gpio1>; - interrupts = <17 IRQ_TYPE_LEVEL_LOW>; - }; -}; - &canfd { pinctrl-0 = <&canfd0_pins>; pinctrl-names = "default"; @@ -139,6 +122,23 @@ clock-frequency = <32768>; }; +&gether { + pinctrl-0 = <&gether_pins>; + pinctrl-names = "default"; + + phy-mode = "rgmii-id"; + phy-handle = <&phy0>; + renesas,no-ether-link; + status = "okay"; + + phy0: ethernet-phy@0 { + rxc-skew-ps = <1500>; + reg = <0>; + interrupt-parent = <&gpio4>; + interrupts = <23 IRQ_TYPE_LEVEL_LOW>; + }; +}; + &i2c0 { pinctrl-0 = <&i2c0_pins>; pinctrl-names = "default"; @@ -236,16 +236,17 @@ }; &pfc { - avb_pins: avb { - groups = "avb_mdio", "avb_rgmii"; - function = "avb"; - }; - canfd0_pins: canfd0 { groups = "canfd0_data_a"; function = "canfd0"; }; + gether_pins: gether { + groups = "gether_mdio_a", "gether_rgmii", + "gether_txcrefclk", "gether_txcrefclk_mega"; + function = "gether"; + }; + i2c0_pins: i2c0 { groups = "i2c0"; function = "i2c0"; From 02522ad77fb7619615720147dc5da18024cad577 Mon Sep 17 00:00:00 2001 From: Heiko Carstens <heiko.carstens@de.ibm.com> Date: Fri, 2 Nov 2018 13:09:08 +0100 Subject: [PATCH 104/443] s390: update defconfigs Signed-off-by: Heiko Carstens <heiko.carstens@de.ibm.com> Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com> --- arch/s390/configs/debug_defconfig | 14 ++++- arch/s390/configs/performance_defconfig | 13 +++- arch/s390/defconfig | 79 +++++++++++++------------ 3 files changed, 63 insertions(+), 43 deletions(-) diff --git a/arch/s390/configs/debug_defconfig b/arch/s390/configs/debug_defconfig index 259d1698ac50a..c69cb04b7a594 100644 --- a/arch/s390/configs/debug_defconfig +++ b/arch/s390/configs/debug_defconfig @@ -64,6 +64,8 @@ CONFIG_NUMA=y CONFIG_PREEMPT=y CONFIG_HZ_100=y CONFIG_KEXEC_FILE=y +CONFIG_EXPOLINE=y +CONFIG_EXPOLINE_AUTO=y CONFIG_MEMORY_HOTPLUG=y CONFIG_MEMORY_HOTREMOVE=y CONFIG_KSM=y @@ -84,9 +86,11 @@ CONFIG_PCI_DEBUG=y CONFIG_HOTPLUG_PCI=y CONFIG_HOTPLUG_PCI_S390=y CONFIG_CHSC_SCH=y +CONFIG_VFIO_AP=m CONFIG_CRASH_DUMP=y CONFIG_BINFMT_MISC=m CONFIG_HIBERNATION=y +CONFIG_PM_DEBUG=y CONFIG_NET=y CONFIG_PACKET=y CONFIG_PACKET_DIAG=m @@ -161,8 +165,6 @@ CONFIG_NF_CONNTRACK_TFTP=m CONFIG_NF_CT_NETLINK=m CONFIG_NF_CT_NETLINK_TIMEOUT=m CONFIG_NF_TABLES=m -CONFIG_NFT_EXTHDR=m -CONFIG_NFT_META=m CONFIG_NFT_CT=m CONFIG_NFT_COUNTER=m CONFIG_NFT_LOG=m @@ -365,6 +367,8 @@ CONFIG_NET_ACT_SKBEDIT=m CONFIG_NET_ACT_CSUM=m CONFIG_DNS_RESOLVER=y CONFIG_OPENVSWITCH=m +CONFIG_VSOCKETS=m +CONFIG_VIRTIO_VSOCKETS=m CONFIG_NETLINK_DIAG=m CONFIG_CGROUP_NET_PRIO=y CONFIG_BPF_JIT=y @@ -461,6 +465,7 @@ CONFIG_PPTP=m CONFIG_PPPOL2TP=m CONFIG_PPP_ASYNC=m CONFIG_PPP_SYNC_TTY=m +CONFIG_ISM=m CONFIG_INPUT_EVDEV=y # CONFIG_INPUT_KEYBOARD is not set # CONFIG_INPUT_MOUSE is not set @@ -486,9 +491,12 @@ CONFIG_MLX4_INFINIBAND=m CONFIG_MLX5_INFINIBAND=m CONFIG_VFIO=m CONFIG_VFIO_PCI=m +CONFIG_VFIO_MDEV=m +CONFIG_VFIO_MDEV_DEVICE=m CONFIG_VIRTIO_PCI=m CONFIG_VIRTIO_BALLOON=m CONFIG_VIRTIO_INPUT=y +CONFIG_S390_AP_IOMMU=y CONFIG_EXT4_FS=y CONFIG_EXT4_FS_POSIX_ACL=y CONFIG_EXT4_FS_SECURITY=y @@ -615,7 +623,6 @@ CONFIG_DEBUG_CREDENTIALS=y CONFIG_RCU_TORTURE_TEST=m CONFIG_RCU_CPU_STALL_TIMEOUT=300 CONFIG_NOTIFIER_ERROR_INJECTION=m -CONFIG_PM_NOTIFIER_ERROR_INJECT=m CONFIG_NETDEV_NOTIFIER_ERROR_INJECT=m CONFIG_FAULT_INJECTION=y CONFIG_FAILSLAB=y @@ -727,3 +734,4 @@ CONFIG_APPLDATA_BASE=y CONFIG_KVM=m CONFIG_KVM_S390_UCONTROL=y CONFIG_VHOST_NET=m +CONFIG_VHOST_VSOCK=m diff --git a/arch/s390/configs/performance_defconfig b/arch/s390/configs/performance_defconfig index 37fd60c20e22d..32f539dc9c192 100644 --- a/arch/s390/configs/performance_defconfig +++ b/arch/s390/configs/performance_defconfig @@ -65,6 +65,8 @@ CONFIG_NR_CPUS=512 CONFIG_NUMA=y CONFIG_HZ_100=y CONFIG_KEXEC_FILE=y +CONFIG_EXPOLINE=y +CONFIG_EXPOLINE_AUTO=y CONFIG_MEMORY_HOTPLUG=y CONFIG_MEMORY_HOTREMOVE=y CONFIG_KSM=y @@ -82,9 +84,11 @@ CONFIG_PCI=y CONFIG_HOTPLUG_PCI=y CONFIG_HOTPLUG_PCI_S390=y CONFIG_CHSC_SCH=y +CONFIG_VFIO_AP=m CONFIG_CRASH_DUMP=y CONFIG_BINFMT_MISC=m CONFIG_HIBERNATION=y +CONFIG_PM_DEBUG=y CONFIG_NET=y CONFIG_PACKET=y CONFIG_PACKET_DIAG=m @@ -159,8 +163,6 @@ CONFIG_NF_CONNTRACK_TFTP=m CONFIG_NF_CT_NETLINK=m CONFIG_NF_CT_NETLINK_TIMEOUT=m CONFIG_NF_TABLES=m -CONFIG_NFT_EXTHDR=m -CONFIG_NFT_META=m CONFIG_NFT_CT=m CONFIG_NFT_COUNTER=m CONFIG_NFT_LOG=m @@ -362,6 +364,8 @@ CONFIG_NET_ACT_SKBEDIT=m CONFIG_NET_ACT_CSUM=m CONFIG_DNS_RESOLVER=y CONFIG_OPENVSWITCH=m +CONFIG_VSOCKETS=m +CONFIG_VIRTIO_VSOCKETS=m CONFIG_NETLINK_DIAG=m CONFIG_CGROUP_NET_PRIO=y CONFIG_BPF_JIT=y @@ -458,6 +462,7 @@ CONFIG_PPTP=m CONFIG_PPPOL2TP=m CONFIG_PPP_ASYNC=m CONFIG_PPP_SYNC_TTY=m +CONFIG_ISM=m CONFIG_INPUT_EVDEV=y # CONFIG_INPUT_KEYBOARD is not set # CONFIG_INPUT_MOUSE is not set @@ -483,9 +488,12 @@ CONFIG_MLX4_INFINIBAND=m CONFIG_MLX5_INFINIBAND=m CONFIG_VFIO=m CONFIG_VFIO_PCI=m +CONFIG_VFIO_MDEV=m +CONFIG_VFIO_MDEV_DEVICE=m CONFIG_VIRTIO_PCI=m CONFIG_VIRTIO_BALLOON=m CONFIG_VIRTIO_INPUT=y +CONFIG_S390_AP_IOMMU=y CONFIG_EXT4_FS=y CONFIG_EXT4_FS_POSIX_ACL=y CONFIG_EXT4_FS_SECURITY=y @@ -666,3 +674,4 @@ CONFIG_APPLDATA_BASE=y CONFIG_KVM=m CONFIG_KVM_S390_UCONTROL=y CONFIG_VHOST_NET=m +CONFIG_VHOST_VSOCK=m diff --git a/arch/s390/defconfig b/arch/s390/defconfig index 7cb6a52f727da..4d58a92b5d979 100644 --- a/arch/s390/defconfig +++ b/arch/s390/defconfig @@ -26,14 +26,23 @@ CONFIG_CGROUP_CPUACCT=y CONFIG_CGROUP_PERF=y CONFIG_NAMESPACES=y CONFIG_USER_NS=y +CONFIG_CHECKPOINT_RESTORE=y CONFIG_BLK_DEV_INITRD=y CONFIG_EXPERT=y # CONFIG_SYSFS_SYSCALL is not set -CONFIG_CHECKPOINT_RESTORE=y CONFIG_BPF_SYSCALL=y CONFIG_USERFAULTFD=y # CONFIG_COMPAT_BRK is not set CONFIG_PROFILING=y +CONFIG_LIVEPATCH=y +CONFIG_NR_CPUS=256 +CONFIG_NUMA=y +CONFIG_HZ_100=y +CONFIG_KEXEC_FILE=y +CONFIG_CRASH_DUMP=y +CONFIG_HIBERNATION=y +CONFIG_PM_DEBUG=y +CONFIG_CMM=m CONFIG_OPROFILE=y CONFIG_KPROBES=y CONFIG_JUMP_LABEL=y @@ -44,11 +53,7 @@ CONFIG_BLK_DEV_INTEGRITY=y CONFIG_PARTITION_ADVANCED=y CONFIG_IBM_PARTITION=y CONFIG_DEFAULT_DEADLINE=y -CONFIG_LIVEPATCH=y -CONFIG_NR_CPUS=256 -CONFIG_NUMA=y -CONFIG_HZ_100=y -CONFIG_KEXEC_FILE=y +CONFIG_BINFMT_MISC=m CONFIG_MEMORY_HOTPLUG=y CONFIG_MEMORY_HOTREMOVE=y CONFIG_KSM=y @@ -60,9 +65,6 @@ CONFIG_ZBUD=m CONFIG_ZSMALLOC=m CONFIG_ZSMALLOC_STAT=y CONFIG_IDLE_PAGE_TRACKING=y -CONFIG_CRASH_DUMP=y -CONFIG_BINFMT_MISC=m -CONFIG_HIBERNATION=y CONFIG_NET=y CONFIG_PACKET=y CONFIG_UNIX=y @@ -98,6 +100,7 @@ CONFIG_BLK_DEV_NBD=m CONFIG_BLK_DEV_RAM=y CONFIG_VIRTIO_BLK=y CONFIG_SCSI=y +# CONFIG_SCSI_MQ_DEFAULT is not set CONFIG_BLK_DEV_SD=y CONFIG_CHR_DEV_ST=y CONFIG_BLK_DEV_SR=y @@ -131,6 +134,7 @@ CONFIG_EQUALIZER=m CONFIG_TUN=m CONFIG_VIRTIO_NET=y # CONFIG_NET_VENDOR_ALACRITECH is not set +# CONFIG_NET_VENDOR_AURORA is not set # CONFIG_NET_VENDOR_CORTINA is not set # CONFIG_NET_VENDOR_SOLARFLARE is not set # CONFIG_NET_VENDOR_SOCIONEXT is not set @@ -157,33 +161,6 @@ CONFIG_TMPFS=y CONFIG_TMPFS_POSIX_ACL=y CONFIG_HUGETLBFS=y # CONFIG_NETWORK_FILESYSTEMS is not set -CONFIG_DEBUG_INFO=y -CONFIG_DEBUG_INFO_DWARF4=y -CONFIG_GDB_SCRIPTS=y -CONFIG_UNUSED_SYMBOLS=y -CONFIG_DEBUG_SECTION_MISMATCH=y -CONFIG_DEBUG_FORCE_WEAK_PER_CPU=y -CONFIG_MAGIC_SYSRQ=y -CONFIG_DEBUG_PAGEALLOC=y -CONFIG_DETECT_HUNG_TASK=y -CONFIG_PANIC_ON_OOPS=y -CONFIG_PROVE_LOCKING=y -CONFIG_LOCK_STAT=y -CONFIG_DEBUG_LOCKDEP=y -CONFIG_DEBUG_ATOMIC_SLEEP=y -CONFIG_DEBUG_LIST=y -CONFIG_DEBUG_SG=y -CONFIG_DEBUG_NOTIFIERS=y -CONFIG_RCU_CPU_STALL_TIMEOUT=60 -CONFIG_LATENCYTOP=y -CONFIG_SCHED_TRACER=y -CONFIG_FTRACE_SYSCALLS=y -CONFIG_TRACER_SNAPSHOT_PER_CPU_SWAP=y -CONFIG_STACK_TRACER=y -CONFIG_BLK_DEV_IO_TRACE=y -CONFIG_FUNCTION_PROFILER=y -# CONFIG_RUNTIME_TESTING_MENU is not set -CONFIG_S390_PTDUMP=y CONFIG_CRYPTO_CRYPTD=m CONFIG_CRYPTO_AUTHENC=m CONFIG_CRYPTO_TEST=m @@ -193,6 +170,7 @@ CONFIG_CRYPTO_CBC=y CONFIG_CRYPTO_CFB=m CONFIG_CRYPTO_CTS=m CONFIG_CRYPTO_LRW=m +CONFIG_CRYPTO_OFB=m CONFIG_CRYPTO_PCBC=m CONFIG_CRYPTO_XTS=m CONFIG_CRYPTO_CMAC=m @@ -231,7 +209,6 @@ CONFIG_CRYPTO_USER_API_HASH=m CONFIG_CRYPTO_USER_API_SKCIPHER=m CONFIG_CRYPTO_USER_API_RNG=m CONFIG_ZCRYPT=m -CONFIG_ZCRYPT_MULTIDEVNODES=y CONFIG_PKEY=m CONFIG_CRYPTO_PAES_S390=m CONFIG_CRYPTO_SHA1_S390=m @@ -247,4 +224,30 @@ CONFIG_CRC7=m # CONFIG_XZ_DEC_ARM is not set # CONFIG_XZ_DEC_ARMTHUMB is not set # CONFIG_XZ_DEC_SPARC is not set -CONFIG_CMM=m +CONFIG_DEBUG_INFO=y +CONFIG_DEBUG_INFO_DWARF4=y +CONFIG_GDB_SCRIPTS=y +CONFIG_UNUSED_SYMBOLS=y +CONFIG_DEBUG_SECTION_MISMATCH=y +CONFIG_DEBUG_FORCE_WEAK_PER_CPU=y +CONFIG_MAGIC_SYSRQ=y +CONFIG_DEBUG_PAGEALLOC=y +CONFIG_DETECT_HUNG_TASK=y +CONFIG_PANIC_ON_OOPS=y +CONFIG_PROVE_LOCKING=y +CONFIG_LOCK_STAT=y +CONFIG_DEBUG_LOCKDEP=y +CONFIG_DEBUG_ATOMIC_SLEEP=y +CONFIG_DEBUG_LIST=y +CONFIG_DEBUG_SG=y +CONFIG_DEBUG_NOTIFIERS=y +CONFIG_RCU_CPU_STALL_TIMEOUT=60 +CONFIG_LATENCYTOP=y +CONFIG_SCHED_TRACER=y +CONFIG_FTRACE_SYSCALLS=y +CONFIG_TRACER_SNAPSHOT_PER_CPU_SWAP=y +CONFIG_STACK_TRACER=y +CONFIG_BLK_DEV_IO_TRACE=y +CONFIG_FUNCTION_PROFILER=y +# CONFIG_RUNTIME_TESTING_MENU is not set +CONFIG_S390_PTDUMP=y From f55275bcc72948056387041abe9fce1c54300adc Mon Sep 17 00:00:00 2001 From: Olof Johansson <olof@lixom.net> Date: Mon, 5 Nov 2018 06:34:09 -0800 Subject: [PATCH 105/443] ARM: defconfig: Disable PREEMPT again on multi_v7 I should have let this soak for a while in linux-next, since we have at least one board that hit a regression from it. Revert from 4.20-rc, and we'll queue it for next merge window once regression is fixed. This reverts commit 513eb98595522bc0cb83831a9daee1d5738e66f1. Signed-off-by: Olof Johansson <olof@lixom.net> --- arch/arm/configs/multi_v7_defconfig | 1 - 1 file changed, 1 deletion(-) diff --git a/arch/arm/configs/multi_v7_defconfig b/arch/arm/configs/multi_v7_defconfig index 1c7616815a86a..63af6234c1b69 100644 --- a/arch/arm/configs/multi_v7_defconfig +++ b/arch/arm/configs/multi_v7_defconfig @@ -1,7 +1,6 @@ CONFIG_SYSVIPC=y CONFIG_NO_HZ=y CONFIG_HIGH_RES_TIMERS=y -CONFIG_PREEMPT=y CONFIG_CGROUPS=y CONFIG_BLK_DEV_INITRD=y CONFIG_EMBEDDED=y From ee474b81fe5aa5dc0faae920bf66240fbf55f891 Mon Sep 17 00:00:00 2001 From: Masami Hiramatsu <mhiramat@kernel.org> Date: Thu, 1 Nov 2018 23:29:28 +0900 Subject: [PATCH 106/443] tracing/kprobes: Fix strpbrk() argument order Fix strpbrk()'s argument order, it must pass acceptable string in 2nd argument. Note that this can cause a kernel panic where it recovers backup character to code->data. Link: http://lkml.kernel.org/r/154108256792.2604.1816052586385217811.stgit@devbox Fixes: a6682814f371 ("tracing/kprobes: Allow kprobe-events to record module symbol") Signed-off-by: Masami Hiramatsu <mhiramat@kernel.org> Signed-off-by: Steven Rostedt (VMware) <rostedt@goodmis.org> --- kernel/trace/trace_probe.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kernel/trace/trace_probe.c b/kernel/trace/trace_probe.c index 3ef15a6683c00..bd30e9398d2a8 100644 --- a/kernel/trace/trace_probe.c +++ b/kernel/trace/trace_probe.c @@ -535,7 +535,7 @@ int traceprobe_update_arg(struct probe_arg *arg) if (code[1].op != FETCH_OP_IMM) return -EINVAL; - tmp = strpbrk("+-", code->data); + tmp = strpbrk(code->data, "+-"); if (tmp) c = *tmp; ret = traceprobe_split_symbol_offset(code->data, From 79ef68c7e1f665578e005b454480b6eca60edabe Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo <acme@redhat.com> Date: Mon, 5 Nov 2018 12:23:40 -0300 Subject: [PATCH 107/443] perf augmented_syscalls: Start collecting pathnames in the BPF program This is the start of having the raw_syscalls:sys_enter BPF handler collecting pointer arguments, namely pathnames, and with two syscalls that have that pointer in different arguments, "open" as it as its first argument, "openat" as the second. With this in place the existing beautifiers in 'perf trace' works, those args are shown instead of just the pointer that comes with the syscalls tracepoints. This also serves to show and document pitfalls in the process of using just that place in the kernel (raw_syscalls:sys_enter) plus tables provided by userspace to collect syscall pointer arguments. One is the need to use a barrier, as suggested by Edward, to avoid clang optimizations that make the kernel BPF verifier to refuse loading our pointer contents collector. The end result should be a generic eBPF program that works in all architectures, with the differences amongst archs resolved by the userspace component, 'perf trace', that should get all its tables created automatically from the kernel components where they are defined, via string table constructors for things not expressed in BTF/DWARF (enums, structs, etc), and otherwise using those observability files (BTF). Cc: Adrian Hunter <adrian.hunter@intel.com> Cc: Alexei Starovoitov <ast@kernel.org> Cc: Daniel Borkmann <daniel@iogearbox.net> Cc: David Ahern <dsahern@gmail.com> Cc: Edward Cree <ecree@solarflare.com> Cc: Jiri Olsa <jolsa@kernel.org> Cc: Martin KaFai Lau <kafai@fb.com> Cc: Namhyung Kim <namhyung@kernel.org> Cc: Wang Nan <wangnan0@huawei.com> Cc: Yonghong Song <yhs@fb.com> Link: https://lkml.kernel.org/n/tip-37dz54pmotgpnwg9tb6zuk9j@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com> --- .../examples/bpf/augmented_raw_syscalls.c | 72 +++++++++++++++++++ 1 file changed, 72 insertions(+) diff --git a/tools/perf/examples/bpf/augmented_raw_syscalls.c b/tools/perf/examples/bpf/augmented_raw_syscalls.c index cde91c34b1016..90a19336310b0 100644 --- a/tools/perf/examples/bpf/augmented_raw_syscalls.c +++ b/tools/perf/examples/bpf/augmented_raw_syscalls.c @@ -37,15 +37,87 @@ struct syscall_exit_args { long ret; }; +struct augmented_filename { + unsigned int size; + int reserved; + char value[256]; +}; + +#define SYS_OPEN 2 +#define SYS_OPENAT 257 + SEC("raw_syscalls:sys_enter") int sys_enter(struct syscall_enter_args *args) { struct { struct syscall_enter_args args; + struct augmented_filename filename; } augmented_args; unsigned int len = sizeof(augmented_args); + const void *filename_arg = NULL; probe_read(&augmented_args.args, sizeof(augmented_args.args), args); + /* + * Yonghong and Edward Cree sayz: + * + * https://www.spinics.net/lists/netdev/msg531645.html + * + * >> R0=inv(id=0) R1=inv2 R6=ctx(id=0,off=0,imm=0) R7=inv64 R10=fp0,call_-1 + * >> 10: (bf) r1 = r6 + * >> 11: (07) r1 += 16 + * >> 12: (05) goto pc+2 + * >> 15: (79) r3 = *(u64 *)(r1 +0) + * >> dereference of modified ctx ptr R1 off=16 disallowed + * > Aha, we at least got a different error message this time. + * > And indeed llvm has done that optimisation, rather than the more obvious + * > 11: r3 = *(u64 *)(r1 +16) + * > because it wants to have lots of reads share a single insn. You may be able + * > to defeat that optimisation by adding compiler barriers, idk. Maybe someone + * > with llvm knowledge can figure out how to stop it (ideally, llvm would know + * > when it's generating for bpf backend and not do that). -O0? ¯\_(ツ)_/¯ + * + * The optimization mostly likes below: + * + * br1: + * ... + * r1 += 16 + * goto merge + * br2: + * ... + * r1 += 20 + * goto merge + * merge: + * *(u64 *)(r1 + 0) + * + * The compiler tries to merge common loads. There is no easy way to + * stop this compiler optimization without turning off a lot of other + * optimizations. The easiest way is to add barriers: + * + * __asm__ __volatile__("": : :"memory") + * + * after the ctx memory access to prevent their down stream merging. + */ + switch (augmented_args.args.syscall_nr) { + case SYS_OPEN: filename_arg = (const void *)args->args[0]; + __asm__ __volatile__("": : :"memory"); + break; + case SYS_OPENAT: filename_arg = (const void *)args->args[1]; + break; + } + + if (filename_arg != NULL) { + augmented_args.filename.reserved = 0; + augmented_args.filename.size = probe_read_str(&augmented_args.filename.value, + sizeof(augmented_args.filename.value), + filename_arg); + if (augmented_args.filename.size < sizeof(augmented_args.filename.value)) { + len -= sizeof(augmented_args.filename.value) - augmented_args.filename.size; + len &= sizeof(augmented_args.filename.value) - 1; + } + } else { + len = sizeof(augmented_args.args); + } + perf_event_output(args, &__augmented_syscalls__, BPF_F_CURRENT_CPU, &augmented_args, len); return 0; } From b42967dcac1d4f5b059ec25568136462bcb051fe Mon Sep 17 00:00:00 2001 From: Yi Wang <wang.yi59@zte.com.cn> Date: Mon, 29 Oct 2018 15:17:31 +0800 Subject: [PATCH 108/443] x86/hyper-v: Fix indentation in hv_do_fast_hypercall16() Remove the surplus TAB in hv_do_fast_hypercall16(). Signed-off-by: Yi Wang <wang.yi59@zte.com.cn> Signed-off-by: Thomas Gleixner <tglx@linutronix.de> Cc: kys@microsoft.com Cc: haiyangz@microsoft.com Cc: sthemmin@microsoft.com Cc: bp@alien8.de Cc: hpa@zytor.com Cc: devel@linuxdriverproject.org Cc: zhong.weidong@zte.com.cn Link: https://lkml.kernel.org/r/1540797451-2792-1-git-send-email-wang.yi59@zte.com.cn --- arch/x86/include/asm/mshyperv.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/x86/include/asm/mshyperv.h b/arch/x86/include/asm/mshyperv.h index 0d6271cce198d..1d0a7778e1631 100644 --- a/arch/x86/include/asm/mshyperv.h +++ b/arch/x86/include/asm/mshyperv.h @@ -232,7 +232,7 @@ static inline u64 hv_do_fast_hypercall16(u16 code, u64 input1, u64 input2) : "cc"); } #endif - return hv_status; + return hv_status; } /* From 437e88ab8f9e2ad90576ab74c4cf8f527bbf51cd Mon Sep 17 00:00:00 2001 From: Nathan Chancellor <natechancellor@gmail.com> Date: Tue, 23 Oct 2018 16:11:25 -0700 Subject: [PATCH 109/443] x86/build: Remove -pipe from KBUILD_CFLAGS Commit 77b0bf55bc67 ("kbuild/Makefile: Prepare for using macros in inline assembly code to work around asm() related GCC inlining bugs") added -Wa,- to KBUILD_CFLAGS, which breaks compiling with Clang (hangs indefinitely at compiling init/main.o). This happens because while Clang accepts -pipe (and has it documented in its list of supported flags), it silently ignores it after this 2010 commit (thanks to Nick Desaulniers for tracking this down), meaning that gas just infinitely waits for stdin and never receives it. https://github.com/llvm-mirror/clang/commit/c19a12dc3d441bec62eed55e312b76c12d6d9022 Initially, I had suggested just add -Wa,- to KBUILD_CFLAGS when GCC was being used but that was before realizing it is because Clang doesn't do anything with -pipe. H. Peter Anvin suggested checking to see if -pipe gives us any gains out of GCC. Turns out it might actually be hurting: With -pipe: real 3m40.813s real 3m44.449s real 3m39.648s Without -pipe: real 3m38.492s real 3m38.335s real 3m38.975s The issue of -Wa,- being passed along to gas without -pipe being supported should still probably be fixed on the LLVM side (open issue: https://bugs.llvm.org/show_bug.cgi?id=39410) but this is not as much of a workaround anymore since it helps both GCC and Clang. Suggested-by: H. Peter Anvin <hpa@zytor.com> Signed-off-by: Nathan Chancellor <natechancellor@gmail.com> Signed-off-by: Thomas Gleixner <tglx@linutronix.de> Tested-by: Nick Desaulniers <ndesaulniers@google.com> Reviewed-by: Nadav Amit <namit@vmware.com> Reviewed-by: Nick Desaulniers <ndesaulniers@google.com> Cc: Borislav Petkov <bp@alien8.de> Cc: Kees Cook <keescook@chromium.org> Cc: Masahiro Yamada <yamada.masahiro@socionext.com> Link: https://github.com/ClangBuiltLinux/linux/issues/213 Link: https://lkml.kernel.org/r/20181023231125.27976-1-natechancellor@gmail.com --- arch/x86/Makefile | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/arch/x86/Makefile b/arch/x86/Makefile index 5b562e4640099..88398fdf81291 100644 --- a/arch/x86/Makefile +++ b/arch/x86/Makefile @@ -213,8 +213,6 @@ ifdef CONFIG_X86_64 KBUILD_LDFLAGS += $(call ld-option, -z max-page-size=0x200000) endif -# Speed up the build -KBUILD_CFLAGS += -pipe # Workaround for a gcc prelease that unfortunately was shipped in a suse release KBUILD_CFLAGS += -Wno-sign-compare # @@ -239,7 +237,7 @@ archheaders: archmacros: $(Q)$(MAKE) $(build)=arch/x86/kernel arch/x86/kernel/macros.s -ASM_MACRO_FLAGS = -Wa,arch/x86/kernel/macros.s -Wa,- +ASM_MACRO_FLAGS = -Wa,arch/x86/kernel/macros.s export ASM_MACRO_FLAGS KBUILD_CFLAGS += $(ASM_MACRO_FLAGS) From 163c8d54a997153ee1a1e07fcac087492ad85b37 Mon Sep 17 00:00:00 2001 From: Martin Schwidefsky <schwidefsky@de.ibm.com> Date: Mon, 5 Nov 2018 07:36:28 +0100 Subject: [PATCH 110/443] compiler: remove __no_sanitize_address_or_inline again The __no_sanitize_address_or_inline and __no_kasan_or_inline defines are almost identical. The only difference is that __no_kasan_or_inline does not have the 'notrace' attribute. To be able to replace __no_sanitize_address_or_inline with the older definition, add 'notrace' to __no_kasan_or_inline and change to two users of __no_sanitize_address_or_inline in the s390 code. The 'notrace' option is necessary for e.g. the __load_psw_mask function in arch/s390/include/asm/processor.h. Without the option it is possible to trace __load_psw_mask which leads to kernel stack overflow. Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com> Pointed-out-by: Andrey Ryabinin <aryabinin@virtuozzo.com> Acked-by: Steven Rostedt (VMware) <rostedt@goodmis.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org> --- arch/s390/include/asm/processor.h | 4 ++-- include/linux/compiler-gcc.h | 12 ------------ include/linux/compiler.h | 2 +- 3 files changed, 3 insertions(+), 15 deletions(-) diff --git a/arch/s390/include/asm/processor.h b/arch/s390/include/asm/processor.h index 302795c47c06c..81038ab357ce9 100644 --- a/arch/s390/include/asm/processor.h +++ b/arch/s390/include/asm/processor.h @@ -236,7 +236,7 @@ static inline unsigned long current_stack_pointer(void) return sp; } -static __no_sanitize_address_or_inline unsigned short stap(void) +static __no_kasan_or_inline unsigned short stap(void) { unsigned short cpu_address; @@ -330,7 +330,7 @@ static inline void __load_psw(psw_t psw) * Set PSW mask to specified value, while leaving the * PSW addr pointing to the next instruction. */ -static __no_sanitize_address_or_inline void __load_psw_mask(unsigned long mask) +static __no_kasan_or_inline void __load_psw_mask(unsigned long mask) { unsigned long addr; psw_t psw; diff --git a/include/linux/compiler-gcc.h b/include/linux/compiler-gcc.h index c0f5db3a96217..2010493e10408 100644 --- a/include/linux/compiler-gcc.h +++ b/include/linux/compiler-gcc.h @@ -143,18 +143,6 @@ #define KASAN_ABI_VERSION 3 #endif -/* - * Because __no_sanitize_address conflicts with inlining: - * https://gcc.gnu.org/bugzilla/show_bug.cgi?id=67368 - * we do one or the other. - */ -#ifdef CONFIG_KASAN -#define __no_sanitize_address_or_inline \ - __no_sanitize_address __maybe_unused notrace -#else -#define __no_sanitize_address_or_inline inline -#endif - #if GCC_VERSION >= 50100 #define COMPILER_HAS_GENERIC_BUILTIN_OVERFLOW 1 #endif diff --git a/include/linux/compiler.h b/include/linux/compiler.h index 18c80cfa4fc48..06396c1cf127f 100644 --- a/include/linux/compiler.h +++ b/include/linux/compiler.h @@ -189,7 +189,7 @@ void __read_once_size(const volatile void *p, void *res, int size) * https://gcc.gnu.org/bugzilla/show_bug.cgi?id=67368 * '__maybe_unused' allows us to avoid defined-but-not-used warnings. */ -# define __no_kasan_or_inline __no_sanitize_address __maybe_unused +# define __no_kasan_or_inline __no_sanitize_address notrace __maybe_unused #else # define __no_kasan_or_inline __always_inline #endif From 21b42eb46834f6245a55ac77bdf3e14c034e2864 Mon Sep 17 00:00:00 2001 From: Masahiro Yamada <yamada.masahiro@socionext.com> Date: Mon, 5 Nov 2018 16:51:49 +0900 Subject: [PATCH 111/443] kbuild: rpm-pkg: fix binrpm-pkg breakage when O= is used Zhenzhong Duan reported that running 'make O=/build/kernel binrpm-pkg' failed with the following errors: Running 'make O=/build/kernel binrpm-pkg' failed with below two errors. Makefile:600: include/config/auto.conf: No such file or directory + cp make -C /mnt/root/kernel O=/build/kernel image_name make -f /mnt/root/kernel/Makefile ... cp: invalid option -- 'C' Try 'cp --help' for more information. Prior to commit 80463f1b7bf9 ("kbuild: add --include-dir flag only for out-of-tree build"), both srctree and objtree were added to --include-dir redundantly, and the wrong code 'make image_name' was working by relying on that. Now, the potential issue that had previously been hidden just showed up. 'make image_name' recurses to the generated $(objtree)/Makefile and ends up with running in srctree, which is incorrect. It should be invoked with '-f $srctree/Makefile' (or KBUILD_SRC=) to be executed in objtree. Fixes: 80463f1b7bf9 ("kbuild: add --include-dir flag only for out-of-tree build") Reported-by: Zhenzhong Duan <zhenzhong.duan@oracle.com> Signed-off-by: Masahiro Yamada <yamada.masahiro@socionext.com> --- scripts/package/mkspec | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/scripts/package/mkspec b/scripts/package/mkspec index e05646dc24dcf..009147d4718ee 100755 --- a/scripts/package/mkspec +++ b/scripts/package/mkspec @@ -12,6 +12,7 @@ # how we were called determines which rpms we build and how we build them if [ "$1" = prebuilt ]; then S=DEL + MAKE="$MAKE -f $srctree/Makefile" else S= fi @@ -78,19 +79,19 @@ $S %prep $S %setup -q $S $S %build -$S make %{?_smp_mflags} KBUILD_BUILD_VERSION=%{release} +$S $MAKE %{?_smp_mflags} KBUILD_BUILD_VERSION=%{release} $S %install mkdir -p %{buildroot}/boot %ifarch ia64 mkdir -p %{buildroot}/boot/efi - cp \$(make image_name) %{buildroot}/boot/efi/vmlinuz-$KERNELRELEASE + cp \$($MAKE image_name) %{buildroot}/boot/efi/vmlinuz-$KERNELRELEASE ln -s efi/vmlinuz-$KERNELRELEASE %{buildroot}/boot/ %else - cp \$(make image_name) %{buildroot}/boot/vmlinuz-$KERNELRELEASE + cp \$($MAKE image_name) %{buildroot}/boot/vmlinuz-$KERNELRELEASE %endif -$M make %{?_smp_mflags} INSTALL_MOD_PATH=%{buildroot} KBUILD_SRC= modules_install - make %{?_smp_mflags} INSTALL_HDR_PATH=%{buildroot}/usr KBUILD_SRC= headers_install +$M $MAKE %{?_smp_mflags} INSTALL_MOD_PATH=%{buildroot} modules_install + $MAKE %{?_smp_mflags} INSTALL_HDR_PATH=%{buildroot}/usr headers_install cp System.map %{buildroot}/boot/System.map-$KERNELRELEASE cp .config %{buildroot}/boot/config-$KERNELRELEASE bzip2 -9 --keep vmlinux From 02826a6ba301b72461c3706e1cc66d5571cd327e Mon Sep 17 00:00:00 2001 From: Masahiro Yamada <yamada.masahiro@socionext.com> Date: Mon, 5 Nov 2018 16:52:34 +0900 Subject: [PATCH 112/443] kbuild: deb-pkg: fix bindeb-pkg breakage when O= is used Ard Biesheuvel reports bindeb-pkg with O= option is broken in the following way: ... LD [M] sound/soc/rockchip/snd-soc-rk3399-gru-sound.ko LD [M] sound/soc/rockchip/snd-soc-rockchip-pcm.ko LD [M] sound/soc/rockchip/snd-soc-rockchip-rt5645.ko LD [M] sound/soc/rockchip/snd-soc-rockchip-spdif.ko LD [M] sound/soc/sh/rcar/snd-soc-rcar.ko fakeroot -u debian/rules binary make KERNELRELEASE=4.19.0-12677-g19beffaf7a99-dirty ARCH=arm64 KBUILD_SRC= intdeb-pkg /bin/bash /home/ard/linux/scripts/package/builddeb Makefile:600: include/config/auto.conf: No such file or directory *** *** Configuration file ".config" not found! *** *** Please run some configurator (e.g. "make oldconfig" or *** "make menuconfig" or "make xconfig"). *** make[12]: *** [syncconfig] Error 1 make[11]: *** [syncconfig] Error 2 make[10]: *** [include/config/auto.conf] Error 2 make[9]: *** [__sub-make] Error 2 ... Prior to commit 80463f1b7bf9 ("kbuild: add --include-dir flag only for out-of-tree build"), both srctree and objtree were added to --include-dir redundantly, and the wrong code '$MAKE image_name' was working by relying on that. Now, the potential issue that had previously been hidden just showed up. '$MAKE image_name' recurses to the generated $(objtree)/Makefile and ends up with running in srctree, which is incorrect. It should be invoked with '-f $srctree/Makefile' (or KBUILD_SRC=) to be executed in objtree. Fixes: 80463f1b7bf9 ("kbuild: add --include-dir flag only for out-of-tree build") Reported-by: Ard Biesheuvel <ard.biesheuvel@linaro.org> Signed-off-by: Masahiro Yamada <yamada.masahiro@socionext.com> Tested-by: Ard Biesheuvel <ard.biesheuvel@linaro.org> --- scripts/package/builddeb | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/package/builddeb b/scripts/package/builddeb index 90c9a8ac7adb8..0b31f4f1f92cf 100755 --- a/scripts/package/builddeb +++ b/scripts/package/builddeb @@ -81,7 +81,7 @@ else cp System.map "$tmpdir/boot/System.map-$version" cp $KCONFIG_CONFIG "$tmpdir/boot/config-$version" fi -cp "$($MAKE -s image_name)" "$tmpdir/$installed_image_path" +cp "$($MAKE -s -f $srctree/Makefile image_name)" "$tmpdir/$installed_image_path" if grep -q "^CONFIG_OF=y" $KCONFIG_CONFIG ; then # Only some architectures with OF support have this target From 5d7a5bcb67c70cbc904057ef52d3fcfeb24420bb Mon Sep 17 00:00:00 2001 From: Frank Sorenson <sorenson@redhat.com> Date: Tue, 30 Oct 2018 15:10:40 -0500 Subject: [PATCH 113/443] sunrpc: correct the computation for page_ptr when truncating When truncating the encode buffer, the page_ptr is getting advanced, causing the next page to be skipped while encoding. The page is still included in the response, so the response contains a page of bogus data. We need to adjust the page_ptr backwards to ensure we encode the next page into the correct place. We saw this triggered when concurrent directory modifications caused nfsd4_encode_direct_fattr() to return nfserr_noent, and the resulting call to xdr_truncate_encode() corrupted the READDIR reply. Signed-off-by: Frank Sorenson <sorenson@redhat.com> Cc: stable@vger.kernel.org Signed-off-by: J. Bruce Fields <bfields@redhat.com> --- net/sunrpc/xdr.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/net/sunrpc/xdr.c b/net/sunrpc/xdr.c index 2bbb8d38d2bf5..5cfb9e0a18dcb 100644 --- a/net/sunrpc/xdr.c +++ b/net/sunrpc/xdr.c @@ -673,11 +673,10 @@ void xdr_truncate_encode(struct xdr_stream *xdr, size_t len) WARN_ON_ONCE(xdr->iov); return; } - if (fraglen) { + if (fraglen) xdr->end = head->iov_base + head->iov_len; - xdr->page_ptr--; - } /* (otherwise assume xdr->end is already set) */ + xdr->page_ptr--; head->iov_len = len; buf->len = len; xdr->p = head->iov_base + head->iov_len; From 6fce3a406108ee6c8a61e2a33e52e9198a626ea0 Mon Sep 17 00:00:00 2001 From: Lucas Stach <l.stach@pengutronix.de> Date: Thu, 4 Oct 2018 11:37:00 +0200 Subject: [PATCH 114/443] drm/etnaviv: fix bogus fence complete check in timeout handler The GPU hardware fences and the job out-fences are on different timelines so it's wrong to compare them. Fix this by only looking at the out-fence. Cc: <stable@vger.kernel.org> Fixes: 2c83a726d6fb (drm/etnaviv: bring back progress check in job timeout handler) Signed-off-by: Lucas Stach <l.stach@pengutronix.de> --- drivers/gpu/drm/etnaviv/etnaviv_sched.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/etnaviv/etnaviv_sched.c b/drivers/gpu/drm/etnaviv/etnaviv_sched.c index 69e9b431bf1f0..e5a9fae31ab7b 100644 --- a/drivers/gpu/drm/etnaviv/etnaviv_sched.c +++ b/drivers/gpu/drm/etnaviv/etnaviv_sched.c @@ -93,7 +93,7 @@ static void etnaviv_sched_timedout_job(struct drm_sched_job *sched_job) * If the GPU managed to complete this jobs fence, the timout is * spurious. Bail out. */ - if (fence_completed(gpu, submit->out_fence->seqno)) + if (dma_fence_is_signaled(submit->out_fence)) return; /* From c3537fc251503af18085b8f84126d13743663970 Mon Sep 17 00:00:00 2001 From: Andi Kleen <ak@linux.intel.com> Date: Mon, 1 Oct 2018 12:59:26 -0700 Subject: [PATCH 115/443] perf evlist: Move perf_evsel__reset_weak_group into evlist - Move the function from builtin-stat to evlist for reuse - Rename to evlist to match purpose better - Pass the evlist as first argument. - No functional changes Signed-off-by: Andi Kleen <ak@linux.intel.com> Acked-by: Jiri Olsa <jolsa@kernel.org> Link: http://lkml.kernel.org/r/20181001195927.14211-1-andi@firstfloor.org Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com> --- tools/perf/builtin-stat.c | 28 +--------------------------- tools/perf/util/evlist.c | 27 +++++++++++++++++++++++++++ tools/perf/util/evlist.h | 3 +++ 3 files changed, 31 insertions(+), 27 deletions(-) diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c index d1028d7755bbc..a635abfa77b6a 100644 --- a/tools/perf/builtin-stat.c +++ b/tools/perf/builtin-stat.c @@ -383,32 +383,6 @@ static bool perf_evsel__should_store_id(struct perf_evsel *counter) return STAT_RECORD || counter->attr.read_format & PERF_FORMAT_ID; } -static struct perf_evsel *perf_evsel__reset_weak_group(struct perf_evsel *evsel) -{ - struct perf_evsel *c2, *leader; - bool is_open = true; - - leader = evsel->leader; - pr_debug("Weak group for %s/%d failed\n", - leader->name, leader->nr_members); - - /* - * for_each_group_member doesn't work here because it doesn't - * include the first entry. - */ - evlist__for_each_entry(evsel_list, c2) { - if (c2 == evsel) - is_open = false; - if (c2->leader == leader) { - if (is_open) - perf_evsel__close(c2); - c2->leader = c2; - c2->nr_members = 0; - } - } - return leader; -} - static bool is_target_alive(struct target *_target, struct thread_map *threads) { @@ -477,7 +451,7 @@ static int __run_perf_stat(int argc, const char **argv, int run_idx) if ((errno == EINVAL || errno == EBADF) && counter->leader != counter && counter->weak_group) { - counter = perf_evsel__reset_weak_group(counter); + counter = perf_evlist__reset_weak_group(evsel_list, counter); goto try_again; } diff --git a/tools/perf/util/evlist.c b/tools/perf/util/evlist.c index e88e6f9b1463f..668d2a9ef0f4b 100644 --- a/tools/perf/util/evlist.c +++ b/tools/perf/util/evlist.c @@ -1810,3 +1810,30 @@ void perf_evlist__force_leader(struct perf_evlist *evlist) leader->forced_leader = true; } } + +struct perf_evsel *perf_evlist__reset_weak_group(struct perf_evlist *evsel_list, + struct perf_evsel *evsel) +{ + struct perf_evsel *c2, *leader; + bool is_open = true; + + leader = evsel->leader; + pr_debug("Weak group for %s/%d failed\n", + leader->name, leader->nr_members); + + /* + * for_each_group_member doesn't work here because it doesn't + * include the first entry. + */ + evlist__for_each_entry(evsel_list, c2) { + if (c2 == evsel) + is_open = false; + if (c2->leader == leader) { + if (is_open) + perf_evsel__close(c2); + c2->leader = c2; + c2->nr_members = 0; + } + } + return leader; +} diff --git a/tools/perf/util/evlist.h b/tools/perf/util/evlist.h index dc66436add98a..9919eed6d15bc 100644 --- a/tools/perf/util/evlist.h +++ b/tools/perf/util/evlist.h @@ -312,4 +312,7 @@ bool perf_evlist__exclude_kernel(struct perf_evlist *evlist); void perf_evlist__force_leader(struct perf_evlist *evlist); +struct perf_evsel *perf_evlist__reset_weak_group(struct perf_evlist *evlist, + struct perf_evsel *evsel); + #endif /* __PERF_EVLIST_H */ From cf99ad1424c54fc84b84d3a3deb57a48c340c30a Mon Sep 17 00:00:00 2001 From: Andi Kleen <ak@linux.intel.com> Date: Mon, 1 Oct 2018 12:59:27 -0700 Subject: [PATCH 116/443] perf record: Support weak groups Implement a weak group fallback for 'perf record', similar to the existing 'perf stat' support. This allows to use groups that might be longer than the available counters without failing. Before: $ perf record -e '{cycles,cache-misses,cache-references,cpu_clk_unhalted.thread,cycles,cycles,cycles}' -a sleep 1 Error: The sys_perf_event_open() syscall returned with 22 (Invalid argument) for event (cycles). /bin/dmesg | grep -i perf may provide additional information. After: $ ./perf record -e '{cycles,cache-misses,cache-references,cpu_clk_unhalted.thread,cycles,cycles,cycles}:W' -a sleep 1 WARNING: No sample_id_all support, falling back to unordered processing [ perf record: Woken up 3 times to write data ] [ perf record: Captured and wrote 8.136 MB perf.data (134069 samples) ] Signed-off-by: Andi Kleen <ak@linux.intel.com> Acked-by: Jiri Olsa <jolsa@kernel.org> Link: http://lkml.kernel.org/r/20181001195927.14211-2-andi@firstfloor.org Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com> --- tools/perf/Documentation/perf-list.txt | 1 - tools/perf/builtin-record.c | 7 ++++++- 2 files changed, 6 insertions(+), 2 deletions(-) diff --git a/tools/perf/Documentation/perf-list.txt b/tools/perf/Documentation/perf-list.txt index 236b9b97dfdb1..667c14e56031b 100644 --- a/tools/perf/Documentation/perf-list.txt +++ b/tools/perf/Documentation/perf-list.txt @@ -55,7 +55,6 @@ counted. The following modifiers exist: S - read sample value (PERF_SAMPLE_READ) D - pin the event to the PMU W - group is weak and will fallback to non-group if not schedulable, - only supported in 'perf stat' for now. The 'p' modifier can be used for specifying how precise the instruction address should be. The 'p' modifier can be specified multiple times: diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c index 10cf889c6d75d..488779bc4c8d2 100644 --- a/tools/perf/builtin-record.c +++ b/tools/perf/builtin-record.c @@ -391,7 +391,12 @@ static int record__open(struct record *rec) ui__warning("%s\n", msg); goto try_again; } - + if ((errno == EINVAL || errno == EBADF) && + pos->leader != pos && + pos->weak_group) { + pos = perf_evlist__reset_weak_group(evlist, pos); + goto try_again; + } rc = -errno; perf_evsel__open_strerror(pos, &opts->target, errno, msg, sizeof(msg)); From ea1fa48c055f833eb25f0c33188feecb7002ada5 Mon Sep 17 00:00:00 2001 From: Thomas Richter <tmricht@linux.ibm.com> Date: Tue, 23 Oct 2018 17:16:16 +0200 Subject: [PATCH 117/443] perf stat: Handle different PMU names with common prefix On s390 the CPU Measurement Facility for counters now supports 2 PMUs named cpum_cf (CPU Measurement Facility for counters) and cpum_cf_diag (CPU Measurement Facility for diagnostic counters) for one and the same CPU. Running command [root@s35lp76 perf]# ./perf stat -e tx_c_tend \ -- ~/mytests/cf-tx-events 1 Measuring transactions TX_C_TABORT_NO_SPECIAL: 0 expected:0 TX_C_TABORT_SPECIAL: 0 expected:0 TX_C_TEND: 1 expected:1 TX_NC_TABORT: 11 expected:11 TX_NC_TEND: 1 expected:1 Performance counter stats for '/root/mytests/cf-tx-events 1': 2 tx_c_tend 0.002120091 seconds time elapsed 0.000121000 seconds user 0.002127000 seconds sys [root@s35lp76 perf]# displays output which is unexpected (and wrong): 2 tx_c_tend The test program definitely triggers only one transaction, as shown in line 'TX_C_TEND: 1 expected:1'. This is caused by the following call sequence: pmu_lookup() scans and installs a PMU. +--> pmu_aliases() parses all aliases in directory .../<pmu-name>/events/* which are file names. +--> pmu_aliases_parse() Read each file in directory and create an new alias entry. This is done with +--> perf_pmu__new_alias() and +--> __perf_pmu__new_alias() which also check for identical alias names. After pmu_aliases() returns, a complete list of event names for this pmu has been created. Now function pmu_add_cpu_aliases() is called to add the events listed in the json | files to the alias list of the cpu. +--> perf_pmu__find_map() Returns a pointer to the json events. Now function pmu_add_cpu_aliases() scans through all events listed in the JSON files for this CPU. Each json event pmu name is compared with the current PMU being built up and if they mismatch, the json event is added to the current PMUs alias list. To avoid duplicate entries the following comparison is done: if (!is_arm_pmu_core(name)) { pname = pe->pmu ? pe->pmu : "cpu"; if (strncmp(pname, name, strlen(pname))) continue; } The culprit is the strncmp() function. Using current s390 PMU naming, the first PMU is 'cpum_cf' and a long list of events is added, among them 'tx_c_tend' When the second PMU named 'cpum_cf_diag' is added, only one event named 'CF_DIAG' is added by the pmu_aliases() function. Now function pmu_add_cpu_aliases() is invoked for PMU 'cpum_cf_diag'. Since the CPUID string is the same for both PMUs, json file events for PMU named 'cpum_cf' are added to the PMU 'cpm_cf_diag' This happens because the strncmp() actually compares: strncmp("cpum_cf", "cpum_cf_diag", 6); The first parameter is the pmu name taken from the event in the json file. The second parameter is the pmu name of the PMU currently being built. They are different, but the length of the compare only tests the common prefix and this returns 0(true) when it should return false. Now all events for PMU cpum_cf are added to the alias list for pmu cpum_cf_diag. Later on in function parse_events_add_pmu() the event 'tx_c_end' is searched in all available PMUs and found twice, adding it two times to the evsel_list global variable which is the root of all events. This results in a counter value of 2 instead of 1. Output with this patch: [root@s35lp76 perf]# ./perf stat -e tx_c_tend \ -- ~/mytests/cf-tx-events 1 Measuring transactions TX_C_TABORT_NO_SPECIAL: 0 expected:0 TX_C_TABORT_SPECIAL: 0 expected:0 TX_C_TEND: 1 expected:1 TX_NC_TABORT: 11 expected:11 TX_NC_TEND: 1 expected:1 Performance counter stats for '/root/mytests/cf-tx-events 1': 1 tx_c_tend 0.001815365 seconds time elapsed 0.000123000 seconds user 0.001756000 seconds sys [root@s35lp76 perf]# Signed-off-by: Thomas Richter <tmricht@linux.ibm.com> Reviewed-by: Hendrik Brueckner <brueckner@linux.ibm.com> Reviewed-by: Sebastien Boisvert <sboisvert@gydle.com> Cc: Heiko Carstens <heiko.carstens@de.ibm.com> Cc: Kan Liang <kan.liang@linux.intel.com> Cc: Martin Schwidefsky <schwidefsky@de.ibm.com> Cc: stable@vger.kernel.org Fixes: 292c34c10249 ("perf pmu: Fix core PMU alias list for X86 platform") Link: http://lkml.kernel.org/r/20181023151616.78193-1-tmricht@linux.ibm.com Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com> --- tools/perf/util/pmu.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/perf/util/pmu.c b/tools/perf/util/pmu.c index 7799788f662fd..7e49baad304d7 100644 --- a/tools/perf/util/pmu.c +++ b/tools/perf/util/pmu.c @@ -773,7 +773,7 @@ static void pmu_add_cpu_aliases(struct list_head *head, struct perf_pmu *pmu) if (!is_arm_pmu_core(name)) { pname = pe->pmu ? pe->pmu : "cpu"; - if (strncmp(pname, name, strlen(pname))) + if (strcmp(pname, name)) continue; } From 590ac60d8aa929bd21e35cd95a7d8720d00eb4f3 Mon Sep 17 00:00:00 2001 From: Jin Yao <yao.jin@linux.intel.com> Date: Wed, 31 Oct 2018 19:06:35 +0800 Subject: [PATCH 118/443] perf top: Display the LBR stats in callchain entry 'perf report' has supported the displaying of LBR stats (such as cycles, predicted%) in callchain entry. For example: $ perf report --branch-history --stdio --1.01%--intel_idle mwait.h:29 intel_idle cpufeature.h:164 (cycles:5) intel_idle cpufeature.h:164 (predicted:76.4%) intel_idle mwait.h:102 (cycles:41) intel_idle current.h:15 While 'perf top' doesn't support that. For example: $ perf top -a -b --call-graph branch - 13.86% 0.23% [kernel] [k] __x86_indirect_thunk_rax - 13.65% __x86_indirect_thunk_rax + 1.69% do_syscall_64 + 1.68% do_select + 1.41% ktime_get + 0.70% __schedule + 0.62% do_sys_poll 0.58% __x86_indirect_thunk_rax Actually it's very easy to enable this feature in 'perf top'. With this patch, the result is: $ perf top -a -b --call-graph branch $ - 13.58% 0.00% [kernel] [k] __x86_indirect_thunk_rax $ - 13.57% __x86_indirect_thunk_rax (predicted:93.9%) $ + 1.78% do_select (cycles:2) $ + 1.68% perf_pmu_disable.part.99 (cycles:1) $ + 1.45% ___sys_recvmsg (cycles:25) $ + 0.81% unix_stream_sendmsg (cycles:18) $ + 0.80% ktime_get (cycles:400) $ 0.58% pick_next_task_fair (cycles:47) $ + 0.56% i915_request_retire (cycles:2) $ + 0.52% do_sys_poll (cycles:4) Signed-off-by: Jin Yao <yao.jin@linux.intel.com> Tested-by: Arnaldo Carvalho de Melo <acme@redhat.com> Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com> Cc: Andi Kleen <ak@linux.intel.com> Cc: Jiri Olsa <jolsa@kernel.org> Cc: Kan Liang <kan.liang@linux.intel.com> Cc: Peter Zijlstra <peterz@infradead.org> Link: http://lkml.kernel.org/r/1540983995-20462-1-git-send-email-yao.jin@linux.intel.com Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com> --- tools/perf/builtin-top.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/tools/perf/builtin-top.c b/tools/perf/builtin-top.c index b2838de13de02..aa0c73e579240 100644 --- a/tools/perf/builtin-top.c +++ b/tools/perf/builtin-top.c @@ -1429,6 +1429,9 @@ int cmd_top(int argc, const char **argv) } } + if (opts->branch_stack && callchain_param.enabled) + symbol_conf.show_branchflag_count = true; + sort__mode = SORT_MODE__TOP; /* display thread wants entries to be collapsed in a different tree */ perf_hpp_list.need_collapse = 1; From 5ed4419d47f8ba6bbccd8e3203276b3c39a792b7 Mon Sep 17 00:00:00 2001 From: Adrian Hunter <adrian.hunter@intel.com> Date: Sun, 4 Nov 2018 17:12:35 +0200 Subject: [PATCH 119/443] perf scripts python: exported-sql-viewer.py: Fall back to /usr/local/lib/libxed.so Fall back to /usr/local/lib/libxed.so to cater for distributions that do not have /usr/local/lib in the library path by default. Signed-off-by: Adrian Hunter <adrian.hunter@intel.com> Cc: Andi Kleen <ak@linux.intel.com> Cc: Jiri Olsa <jolsa@redhat.com> Link: http://lkml.kernel.org/r/20181104151238.15947-2-adrian.hunter@intel.com Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com> --- tools/perf/scripts/python/exported-sql-viewer.py | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/tools/perf/scripts/python/exported-sql-viewer.py b/tools/perf/scripts/python/exported-sql-viewer.py index 24cb0bd56afa5..20cc8e7879b97 100755 --- a/tools/perf/scripts/python/exported-sql-viewer.py +++ b/tools/perf/scripts/python/exported-sql-viewer.py @@ -1929,7 +1929,12 @@ def __init__(self, libxed): class LibXED(): def __init__(self): - self.libxed = CDLL("libxed.so") + try: + self.libxed = CDLL("libxed.so") + except: + self.libxed = None + if not self.libxed: + self.libxed = CDLL("/usr/local/lib/libxed.so") self.xed_tables_init = self.libxed.xed_tables_init self.xed_tables_init.restype = None From 210cf1f96185f0c6383df8b6030e3d2945e1b41a Mon Sep 17 00:00:00 2001 From: Adrian Hunter <adrian.hunter@intel.com> Date: Sun, 4 Nov 2018 17:12:36 +0200 Subject: [PATCH 120/443] perf scripts python: exported-sql-viewer.py: Add Selected branches report Fetching data from the database can be slow. Add a report that provides the ability to select a subset of branches. Signed-off-by: Adrian Hunter <adrian.hunter@intel.com> Cc: Andi Kleen <ak@linux.intel.com> Cc: Jiri Olsa <jolsa@redhat.com> Link: http://lkml.kernel.org/r/20181104151238.15947-3-adrian.hunter@intel.com Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com> --- .../scripts/python/exported-sql-viewer.py | 327 ++++++++++++++++++ 1 file changed, 327 insertions(+) diff --git a/tools/perf/scripts/python/exported-sql-viewer.py b/tools/perf/scripts/python/exported-sql-viewer.py index 20cc8e7879b97..a9d2b3170141f 100755 --- a/tools/perf/scripts/python/exported-sql-viewer.py +++ b/tools/perf/scripts/python/exported-sql-viewer.py @@ -119,6 +119,14 @@ def dsoname(name): return "[kernel]" return name +def findnth(s, sub, n, offs=0): + pos = s.find(sub) + if pos < 0: + return pos + if n <= 1: + return offs + pos + return findnth(s[pos + 1:], sub, n - 1, offs + pos + 1) + # Percent to one decimal place def PercentToOneDP(n, d): @@ -1464,6 +1472,317 @@ def FindDone(self, row): else: self.find_bar.NotFound() +# Dialog data item converted and validated using a SQL table + +class SQLTableDialogDataItem(): + + def __init__(self, glb, label, placeholder_text, table_name, match_column, column_name1, column_name2, parent): + self.glb = glb + self.label = label + self.placeholder_text = placeholder_text + self.table_name = table_name + self.match_column = match_column + self.column_name1 = column_name1 + self.column_name2 = column_name2 + self.parent = parent + + self.value = "" + + self.widget = QLineEdit() + self.widget.editingFinished.connect(self.Validate) + self.widget.textChanged.connect(self.Invalidate) + self.red = False + self.error = "" + self.validated = True + + self.last_id = 0 + self.first_time = 0 + self.last_time = 2 ** 64 + if self.table_name == "<timeranges>": + query = QSqlQuery(self.glb.db) + QueryExec(query, "SELECT id, time FROM samples ORDER BY id DESC LIMIT 1") + if query.next(): + self.last_id = int(query.value(0)) + self.last_time = int(query.value(1)) + QueryExec(query, "SELECT time FROM samples WHERE time != 0 ORDER BY id LIMIT 1") + if query.next(): + self.first_time = int(query.value(0)) + if placeholder_text: + placeholder_text += ", between " + str(self.first_time) + " and " + str(self.last_time) + + if placeholder_text: + self.widget.setPlaceholderText(placeholder_text) + + def ValueToIds(self, value): + ids = [] + query = QSqlQuery(self.glb.db) + stmt = "SELECT id FROM " + self.table_name + " WHERE " + self.match_column + " = '" + value + "'" + ret = query.exec_(stmt) + if ret: + while query.next(): + ids.append(str(query.value(0))) + return ids + + def IdBetween(self, query, lower_id, higher_id, order): + QueryExec(query, "SELECT id FROM samples WHERE id > " + str(lower_id) + " AND id < " + str(higher_id) + " ORDER BY id " + order + " LIMIT 1") + if query.next(): + return True, int(query.value(0)) + else: + return False, 0 + + def BinarySearchTime(self, lower_id, higher_id, target_time, get_floor): + query = QSqlQuery(self.glb.db) + while True: + next_id = int((lower_id + higher_id) / 2) + QueryExec(query, "SELECT time FROM samples WHERE id = " + str(next_id)) + if not query.next(): + ok, dbid = self.IdBetween(query, lower_id, next_id, "DESC") + if not ok: + ok, dbid = self.IdBetween(query, next_id, higher_id, "") + if not ok: + return str(higher_id) + next_id = dbid + QueryExec(query, "SELECT time FROM samples WHERE id = " + str(next_id)) + next_time = int(query.value(0)) + if get_floor: + if target_time > next_time: + lower_id = next_id + else: + higher_id = next_id + if higher_id <= lower_id + 1: + return str(higher_id) + else: + if target_time >= next_time: + lower_id = next_id + else: + higher_id = next_id + if higher_id <= lower_id + 1: + return str(lower_id) + + def ConvertRelativeTime(self, val): + print "val ", val + mult = 1 + suffix = val[-2:] + if suffix == "ms": + mult = 1000000 + elif suffix == "us": + mult = 1000 + elif suffix == "ns": + mult = 1 + else: + return val + val = val[:-2].strip() + if not self.IsNumber(val): + return val + val = int(val) * mult + if val >= 0: + val += self.first_time + else: + val += self.last_time + return str(val) + + def ConvertTimeRange(self, vrange): + print "vrange ", vrange + if vrange[0] == "": + vrange[0] = str(self.first_time) + if vrange[1] == "": + vrange[1] = str(self.last_time) + vrange[0] = self.ConvertRelativeTime(vrange[0]) + vrange[1] = self.ConvertRelativeTime(vrange[1]) + print "vrange2 ", vrange + if not self.IsNumber(vrange[0]) or not self.IsNumber(vrange[1]): + return False + print "ok1" + beg_range = max(int(vrange[0]), self.first_time) + end_range = min(int(vrange[1]), self.last_time) + if beg_range > self.last_time or end_range < self.first_time: + return False + print "ok2" + vrange[0] = self.BinarySearchTime(0, self.last_id, beg_range, True) + vrange[1] = self.BinarySearchTime(1, self.last_id + 1, end_range, False) + print "vrange3 ", vrange + return True + + def AddTimeRange(self, value, ranges): + print "value ", value + n = value.count("-") + if n == 1: + pass + elif n == 2: + if value.split("-")[1].strip() == "": + n = 1 + elif n == 3: + n = 2 + else: + return False + pos = findnth(value, "-", n) + vrange = [value[:pos].strip() ,value[pos+1:].strip()] + if self.ConvertTimeRange(vrange): + ranges.append(vrange) + return True + return False + + def InvalidValue(self, value): + self.value = "" + palette = QPalette() + palette.setColor(QPalette.Text,Qt.red) + self.widget.setPalette(palette) + self.red = True + self.error = self.label + " invalid value '" + value + "'" + self.parent.ShowMessage(self.error) + + def IsNumber(self, value): + try: + x = int(value) + except: + x = 0 + return str(x) == value + + def Invalidate(self): + self.validated = False + + def Validate(self): + input_string = self.widget.text() + self.validated = True + if self.red: + palette = QPalette() + self.widget.setPalette(palette) + self.red = False + if not len(input_string.strip()): + self.error = "" + self.value = "" + return + if self.table_name == "<timeranges>": + ranges = [] + for value in [x.strip() for x in input_string.split(",")]: + if not self.AddTimeRange(value, ranges): + return self.InvalidValue(value) + ranges = [("(" + self.column_name1 + " >= " + r[0] + " AND " + self.column_name1 + " <= " + r[1] + ")") for r in ranges] + self.value = " OR ".join(ranges) + elif self.table_name == "<ranges>": + singles = [] + ranges = [] + for value in [x.strip() for x in input_string.split(",")]: + if "-" in value: + vrange = value.split("-") + if len(vrange) != 2 or not self.IsNumber(vrange[0]) or not self.IsNumber(vrange[1]): + return self.InvalidValue(value) + ranges.append(vrange) + else: + if not self.IsNumber(value): + return self.InvalidValue(value) + singles.append(value) + ranges = [("(" + self.column_name1 + " >= " + r[0] + " AND " + self.column_name1 + " <= " + r[1] + ")") for r in ranges] + if len(singles): + ranges.append(self.column_name1 + " IN (" + ",".join(singles) + ")") + self.value = " OR ".join(ranges) + elif self.table_name: + all_ids = [] + for value in [x.strip() for x in input_string.split(",")]: + ids = self.ValueToIds(value) + if len(ids): + all_ids.extend(ids) + else: + return self.InvalidValue(value) + self.value = self.column_name1 + " IN (" + ",".join(all_ids) + ")" + if self.column_name2: + self.value = "( " + self.value + " OR " + self.column_name2 + " IN (" + ",".join(all_ids) + ") )" + else: + self.value = input_string.strip() + self.error = "" + self.parent.ClearMessage() + + def IsValid(self): + if not self.validated: + self.Validate() + if len(self.error): + self.parent.ShowMessage(self.error) + return False + return True + +# Selected branch report creation dialog + +class SelectedBranchDialog(QDialog): + + def __init__(self, glb, parent=None): + super(SelectedBranchDialog, self).__init__(parent) + + self.glb = glb + + self.name = "" + self.where_clause = "" + + self.setWindowTitle("Selected Branches") + self.setMinimumWidth(600) + + items = ( + ("Report name:", "Enter a name to appear in the window title bar", "", "", "", ""), + ("Time ranges:", "Enter time ranges", "<timeranges>", "", "samples.id", ""), + ("CPUs:", "Enter CPUs or ranges e.g. 0,5-6", "<ranges>", "", "cpu", ""), + ("Commands:", "Only branches with these commands will be included", "comms", "comm", "comm_id", ""), + ("PIDs:", "Only branches with these process IDs will be included", "threads", "pid", "thread_id", ""), + ("TIDs:", "Only branches with these thread IDs will be included", "threads", "tid", "thread_id", ""), + ("DSOs:", "Only branches with these DSOs will be included", "dsos", "short_name", "samples.dso_id", "to_dso_id"), + ("Symbols:", "Only branches with these symbols will be included", "symbols", "name", "symbol_id", "to_symbol_id"), + ("Raw SQL clause: ", "Enter a raw SQL WHERE clause", "", "", "", ""), + ) + self.data_items = [SQLTableDialogDataItem(glb, *x, parent=self) for x in items] + + self.grid = QGridLayout() + + for row in xrange(len(self.data_items)): + self.grid.addWidget(QLabel(self.data_items[row].label), row, 0) + self.grid.addWidget(self.data_items[row].widget, row, 1) + + self.status = QLabel() + + self.ok_button = QPushButton("Ok", self) + self.ok_button.setDefault(True) + self.ok_button.released.connect(self.Ok) + self.ok_button.setSizePolicy(QSizePolicy.Fixed, QSizePolicy.Fixed) + + self.cancel_button = QPushButton("Cancel", self) + self.cancel_button.released.connect(self.reject) + self.cancel_button.setSizePolicy(QSizePolicy.Fixed, QSizePolicy.Fixed) + + self.hbox = QHBoxLayout() + #self.hbox.addStretch() + self.hbox.addWidget(self.status) + self.hbox.addWidget(self.ok_button) + self.hbox.addWidget(self.cancel_button) + + self.vbox = QVBoxLayout() + self.vbox.addLayout(self.grid) + self.vbox.addLayout(self.hbox) + + self.setLayout(self.vbox); + + def Ok(self): + self.name = self.data_items[0].value + if not self.name: + self.ShowMessage("Report name is required") + return + for d in self.data_items: + if not d.IsValid(): + return + for d in self.data_items[1:]: + if len(d.value): + if len(self.where_clause): + self.where_clause += " AND " + self.where_clause += d.value + if len(self.where_clause): + self.where_clause = " AND ( " + self.where_clause + " ) " + else: + self.ShowMessage("No selection") + return + self.accept() + + def ShowMessage(self, msg): + self.status.setText("<font color=#FF0000>" + msg) + + def ClearMessage(self): + self.status.setText("") + # Event list def GetEventList(db): @@ -1888,6 +2207,8 @@ def EventMenu(self, events, reports_menu): if event == "branches": label = "All branches" if branches_events == 1 else "All branches " + "(id=" + dbid + ")" reports_menu.addAction(CreateAction(label, "Create a new window displaying branch events", lambda x=dbid: self.NewBranchView(x), self)) + label = "Selected branches" if branches_events == 1 else "Selected branches " + "(id=" + dbid + ")" + reports_menu.addAction(CreateAction(label, "Create a new window displaying branch events", lambda x=dbid: self.NewSelectedBranchView(x), self)) def TableMenu(self, tables, menu): table_menu = menu.addMenu("&Tables") @@ -1900,6 +2221,12 @@ def NewCallGraph(self): def NewBranchView(self, event_id): BranchWindow(self.glb, event_id, "", "", self) + def NewSelectedBranchView(self, event_id): + dialog = SelectedBranchDialog(self.glb, self) + ret = dialog.exec_() + if ret: + BranchWindow(self.glb, event_id, dialog.name, dialog.where_clause, self) + def NewTableView(self, table_name): TableWindow(self.glb, table_name, self) From 65b24292e8f34df22a16bf7c47823325ac247572 Mon Sep 17 00:00:00 2001 From: Adrian Hunter <adrian.hunter@intel.com> Date: Sun, 4 Nov 2018 17:12:37 +0200 Subject: [PATCH 121/443] perf scripts python: exported-sql-viewer.py: Add help window Add a window to display help. It is also possible to display the help only, by using the option "--help-only" instead of a database name. Signed-off-by: Adrian Hunter <adrian.hunter@intel.com> Cc: Andi Kleen <ak@linux.intel.com> Cc: Jiri Olsa <jolsa@redhat.com> Link: http://lkml.kernel.org/r/20181104151238.15947-4-adrian.hunter@intel.com Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com> --- .../scripts/python/exported-sql-viewer.py | 155 +++++++++++++++++- 1 file changed, 154 insertions(+), 1 deletion(-) diff --git a/tools/perf/scripts/python/exported-sql-viewer.py b/tools/perf/scripts/python/exported-sql-viewer.py index a9d2b3170141f..c2fcf6c5237ab 100755 --- a/tools/perf/scripts/python/exported-sql-viewer.py +++ b/tools/perf/scripts/python/exported-sql-viewer.py @@ -2084,6 +2084,147 @@ def Update(self): def setActiveSubWindow(self, nr): self.mdi_area.setActiveSubWindow(self.mdi_area.subWindowList()[nr - 1]) +# Help text + +glb_help_text = """ +<h1>Contents</h1> +<style> +p.c1 { + text-indent: 40px; +} +p.c2 { + text-indent: 80px; +} +} +</style> +<p class=c1><a href=#reports>1. Reports</a></p> +<p class=c2><a href=#callgraph>1.1 Context-Sensitive Call Graph</a></p> +<p class=c2><a href=#allbranches>1.2 All branches</a></p> +<p class=c2><a href=#selectedbranches>1.3 Selected branches</a></p> +<p class=c1><a href=#tables>2. Tables</a></p> +<h1 id=reports>1. Reports</h1> +<h2 id=callgraph>1.1 Context-Sensitive Call Graph</h2> +The result is a GUI window with a tree representing a context-sensitive +call-graph. Expanding a couple of levels of the tree and adjusting column +widths to suit will display something like: +<pre> + Call Graph: pt_example +Call Path Object Count Time(ns) Time(%) Branch Count Branch Count(%) +v- ls + v- 2638:2638 + v- _start ld-2.19.so 1 10074071 100.0 211135 100.0 + |- unknown unknown 1 13198 0.1 1 0.0 + >- _dl_start ld-2.19.so 1 1400980 13.9 19637 9.3 + >- _d_linit_internal ld-2.19.so 1 448152 4.4 11094 5.3 + v-__libc_start_main@plt ls 1 8211741 81.5 180397 85.4 + >- _dl_fixup ld-2.19.so 1 7607 0.1 108 0.1 + >- __cxa_atexit libc-2.19.so 1 11737 0.1 10 0.0 + >- __libc_csu_init ls 1 10354 0.1 10 0.0 + |- _setjmp libc-2.19.so 1 0 0.0 4 0.0 + v- main ls 1 8182043 99.6 180254 99.9 +</pre> +<h3>Points to note:</h3> +<ul> +<li>The top level is a command name (comm)</li> +<li>The next level is a thread (pid:tid)</li> +<li>Subsequent levels are functions</li> +<li>'Count' is the number of calls</li> +<li>'Time' is the elapsed time until the function returns</li> +<li>Percentages are relative to the level above</li> +<li>'Branch Count' is the total number of branches for that function and all functions that it calls +</ul> +<h3>Find</h3> +Ctrl-F displays a Find bar which finds function names by either an exact match or a pattern match. +The pattern matching symbols are ? for any character and * for zero or more characters. +<h2 id=allbranches>1.2 All branches</h2> +The All branches report displays all branches in chronological order. +Not all data is fetched immediately. More records can be fetched using the Fetch bar provided. +<h3>Disassembly</h3> +Open a branch to display disassembly. This only works if: +<ol> +<li>The disassembler is available. Currently, only Intel XED is supported - see <a href=#xed>Intel XED Setup</a></li> +<li>The object code is available. Currently, only the perf build ID cache is searched for object code. +The default directory ~/.debug can be overridden by setting environment variable PERF_BUILDID_DIR. +One exception is kcore where the DSO long name is used (refer dsos_view on the Tables menu), +or alternatively, set environment variable PERF_KCORE to the kcore file name.</li> +</ol> +<h4 id=xed>Intel XED Setup</h4> +To use Intel XED, libxed.so must be present. To build and install libxed.so: +<pre> +git clone https://github.com/intelxed/mbuild.git mbuild +git clone https://github.com/intelxed/xed +cd xed +./mfile.py --share +sudo ./mfile.py --prefix=/usr/local install +sudo ldconfig +</pre> +<h3>Find</h3> +Ctrl-F displays a Find bar which finds substrings by either an exact match or a regular expression match. +Refer to Python documentation for the regular expression syntax. +All columns are searched, but only currently fetched rows are searched. +<h2 id=selectedbranches>1.3 Selected branches</h2> +This is the same as the <a href=#allbranches>All branches</a> report but with the data reduced +by various selection criteria. A dialog box displays available criteria which are AND'ed together. +<h3>1.3.1 Time ranges</h3> +The time ranges hint text shows the total time range. Relative time ranges can also be entered in +ms, us or ns. Also, negative values are relative to the end of trace. Examples: +<pre> + 81073085947329-81073085958238 From 81073085947329 to 81073085958238 + 100us-200us From 100us to 200us + 10ms- From 10ms to the end + -100ns The first 100ns + -10ms- The last 10ms +</pre> +N.B. Due to the granularity of timestamps, there could be no branches in any given time range. +<h1 id=tables>2. Tables</h1> +The Tables menu shows all tables and views in the database. Most tables have an associated view +which displays the information in a more friendly way. Not all data for large tables is fetched +immediately. More records can be fetched using the Fetch bar provided. Columns can be sorted, +but that can be slow for large tables. +<p>There are also tables of database meta-information. +For SQLite3 databases, the sqlite_master table is included. +For PostgreSQL databases, information_schema.tables/views/columns are included. +<h3>Find</h3> +Ctrl-F displays a Find bar which finds substrings by either an exact match or a regular expression match. +Refer to Python documentation for the regular expression syntax. +All columns are searched, but only currently fetched rows are searched. +""" + +# Help window + +class HelpWindow(QMdiSubWindow): + + def __init__(self, glb, parent=None): + super(HelpWindow, self).__init__(parent) + + self.text = QTextBrowser() + self.text.setHtml(glb_help_text) + self.text.setReadOnly(True) + self.text.setOpenExternalLinks(True) + + self.setWidget(self.text) + + AddSubWindow(glb.mainwindow.mdi_area, self, "Exported SQL Viewer Help") + +# Main window that only displays the help text + +class HelpOnlyWindow(QMainWindow): + + def __init__(self, parent=None): + super(HelpOnlyWindow, self).__init__(parent) + + self.setMinimumSize(200, 100) + self.resize(800, 600) + self.setWindowTitle("Exported SQL Viewer Help") + self.setWindowIcon(self.style().standardIcon(QStyle.SP_MessageBoxInformation)) + + self.text = QTextBrowser() + self.text.setHtml(glb_help_text) + self.text.setReadOnly(True) + self.text.setOpenExternalLinks(True) + + self.setCentralWidget(self.text) + # Font resize def ResizeFont(widget, diff): @@ -2170,6 +2311,9 @@ def __init__(self, glb, parent=None): self.window_menu = WindowMenu(self.mdi_area, menu) + help_menu = menu.addMenu("&Help") + help_menu.addAction(CreateAction("&Exported SQL Viewer Help", "Helpful information", self.Help, self, QKeySequence.HelpContents)) + def Find(self): win = self.mdi_area.activeSubWindow() if win: @@ -2230,6 +2374,9 @@ def NewSelectedBranchView(self, event_id): def NewTableView(self, table_name): TableWindow(self.glb, table_name, self) + def Help(self): + HelpWindow(self.glb, self) + # XED Disassembler class xed_state_t(Structure): @@ -2429,10 +2576,16 @@ def Open(self, connection_name): def Main(): if (len(sys.argv) < 2): - print >> sys.stderr, "Usage is: exported-sql-viewer.py <database name>" + print >> sys.stderr, "Usage is: exported-sql-viewer.py {<database name> | --help-only}" raise Exception("Too few arguments") dbname = sys.argv[1] + if dbname == "--help-only": + app = QApplication(sys.argv) + mainwindow = HelpOnlyWindow() + mainwindow.show() + err = app.exec_() + sys.exit(err) is_sqlite3 = False try: From 35fa1cee21e34f43db928d022610707d5a234faf Mon Sep 17 00:00:00 2001 From: Adrian Hunter <adrian.hunter@intel.com> Date: Sun, 4 Nov 2018 17:12:38 +0200 Subject: [PATCH 122/443] perf scripts python: exported-sql-viewer.py: Fix table find when table re-ordered Table rows can be re-ordered by selecting a column to sort by. After re-ordering, the "find" operation was highlighting the wrong row, fix it. Signed-off-by: Adrian Hunter <adrian.hunter@intel.com> Cc: Andi Kleen <ak@linux.intel.com> Cc: Jiri Olsa <jolsa@redhat.com> Link: http://lkml.kernel.org/r/20181104151238.15947-5-adrian.hunter@intel.com Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com> --- tools/perf/scripts/python/exported-sql-viewer.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/tools/perf/scripts/python/exported-sql-viewer.py b/tools/perf/scripts/python/exported-sql-viewer.py index c2fcf6c5237ab..f278ce5ebab76 100755 --- a/tools/perf/scripts/python/exported-sql-viewer.py +++ b/tools/perf/scripts/python/exported-sql-viewer.py @@ -1975,7 +1975,7 @@ def Find(self, value, direction, pattern, context): def FindDone(self, row): self.find_bar.Idle() if row >= 0: - self.view.setCurrentIndex(self.model.index(row, 0, QModelIndex())) + self.view.setCurrentIndex(self.model.mapFromSource(self.data_model.index(row, 0, QModelIndex()))) else: self.find_bar.NotFound() @@ -2188,6 +2188,8 @@ def setActiveSubWindow(self, nr): Ctrl-F displays a Find bar which finds substrings by either an exact match or a regular expression match. Refer to Python documentation for the regular expression syntax. All columns are searched, but only currently fetched rows are searched. +<p>N.B. Results are found in id order, so if the table is re-ordered, find-next and find-previous +will go to the next/previous result in id order, instead of display order. """ # Help window From 93f8be2799515e01647c5a9b0d17a90a00ebcf82 Mon Sep 17 00:00:00 2001 From: Adrian Hunter <adrian.hunter@intel.com> Date: Mon, 5 Nov 2018 09:35:04 +0200 Subject: [PATCH 123/443] perf intel-pt: Add more event information to debug log More event information is useful for debugging, especially MMAP events. Signed-off-by: Adrian Hunter <adrian.hunter@intel.com> Link: http://lkml.kernel.org/r/20181105073505.8129-2-adrian.hunter@intel.com Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com> --- tools/perf/util/intel-pt-decoder/intel-pt-log.c | 5 +++++ tools/perf/util/intel-pt-decoder/intel-pt-log.h | 1 + tools/perf/util/intel-pt.c | 16 +++++++++++++--- 3 files changed, 19 insertions(+), 3 deletions(-) diff --git a/tools/perf/util/intel-pt-decoder/intel-pt-log.c b/tools/perf/util/intel-pt-decoder/intel-pt-log.c index e02bc7b166a0e..5e64da270f976 100644 --- a/tools/perf/util/intel-pt-decoder/intel-pt-log.c +++ b/tools/perf/util/intel-pt-decoder/intel-pt-log.c @@ -31,6 +31,11 @@ static FILE *f; static char log_name[MAX_LOG_NAME]; bool intel_pt_enable_logging; +void *intel_pt_log_fp(void) +{ + return f; +} + void intel_pt_log_enable(void) { intel_pt_enable_logging = true; diff --git a/tools/perf/util/intel-pt-decoder/intel-pt-log.h b/tools/perf/util/intel-pt-decoder/intel-pt-log.h index 45b64f93f3588..cc084937f701a 100644 --- a/tools/perf/util/intel-pt-decoder/intel-pt-log.h +++ b/tools/perf/util/intel-pt-decoder/intel-pt-log.h @@ -22,6 +22,7 @@ struct intel_pt_pkt; +void *intel_pt_log_fp(void); void intel_pt_log_enable(void); void intel_pt_log_disable(void); void intel_pt_log_set_name(const char *name); diff --git a/tools/perf/util/intel-pt.c b/tools/perf/util/intel-pt.c index 86cc9a64e9827..149ff361ca789 100644 --- a/tools/perf/util/intel-pt.c +++ b/tools/perf/util/intel-pt.c @@ -206,6 +206,16 @@ static void intel_pt_dump_event(struct intel_pt *pt, unsigned char *buf, intel_pt_dump(pt, buf, len); } +static void intel_pt_log_event(union perf_event *event) +{ + FILE *f = intel_pt_log_fp(); + + if (!intel_pt_enable_logging || !f) + return; + + perf_event__fprintf(event, f); +} + static int intel_pt_do_fix_overlap(struct intel_pt *pt, struct auxtrace_buffer *a, struct auxtrace_buffer *b) { @@ -2010,9 +2020,9 @@ static int intel_pt_process_event(struct perf_session *session, event->header.type == PERF_RECORD_SWITCH_CPU_WIDE) err = intel_pt_context_switch(pt, event, sample); - intel_pt_log("event %s (%u): cpu %d time %"PRIu64" tsc %#"PRIx64"\n", - perf_event__name(event->header.type), event->header.type, - sample->cpu, sample->time, timestamp); + intel_pt_log("event %u: cpu %d time %"PRIu64" tsc %#"PRIx64" ", + event->header.type, sample->cpu, sample->time, timestamp); + intel_pt_log_event(event); return err; } From f6c23e3b55cb93f32a724f41af8d38888bc2ab6b Mon Sep 17 00:00:00 2001 From: Adrian Hunter <adrian.hunter@intel.com> Date: Mon, 5 Nov 2018 09:35:05 +0200 Subject: [PATCH 124/443] perf intel-pt: Add MTC and CYC timestamps to debug log One cause of decoding errors is un-synchronized side-band data. Timestamps are needed to debug such cases. TSC packet timestamps are logged. Log also MTC and CYC timestamps. Signed-off-by: Adrian Hunter <adrian.hunter@intel.com> Link: http://lkml.kernel.org/r/20181105073505.8129-3-adrian.hunter@intel.com Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com> --- tools/perf/util/intel-pt-decoder/intel-pt-decoder.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/tools/perf/util/intel-pt-decoder/intel-pt-decoder.c b/tools/perf/util/intel-pt-decoder/intel-pt-decoder.c index 58f6a9ceb5909..4503f3ca45ab4 100644 --- a/tools/perf/util/intel-pt-decoder/intel-pt-decoder.c +++ b/tools/perf/util/intel-pt-decoder/intel-pt-decoder.c @@ -1474,6 +1474,8 @@ static void intel_pt_calc_mtc_timestamp(struct intel_pt_decoder *decoder) decoder->have_calc_cyc_to_tsc = false; intel_pt_calc_cyc_to_tsc(decoder, true); } + + intel_pt_log_to("Setting timestamp", decoder->timestamp); } static void intel_pt_calc_cbr(struct intel_pt_decoder *decoder) @@ -1514,6 +1516,8 @@ static void intel_pt_calc_cyc_timestamp(struct intel_pt_decoder *decoder) decoder->timestamp = timestamp; decoder->timestamp_insn_cnt = 0; + + intel_pt_log_to("Setting timestamp", decoder->timestamp); } /* Walk PSB+ packets when already in sync. */ From c0fae7e2452b90c31edd2d25eb3baf0c76b400ca Mon Sep 17 00:00:00 2001 From: Aaro Koskinen <aaro.koskinen@iki.fi> Date: Sat, 27 Oct 2018 01:46:34 +0300 Subject: [PATCH 125/443] MIPS: OCTEON: fix out of bounds array access on CN68XX The maximum number of interfaces is returned by cvmx_helper_get_number_of_interfaces(), and the value is used to access interface_port_count[]. When CN68XX support was added, we forgot to increase the array size. Fix that. Fixes: 2c8c3f0201333 ("MIPS: Octeon: Support additional interfaces on CN68XX") Signed-off-by: Aaro Koskinen <aaro.koskinen@iki.fi> Signed-off-by: Paul Burton <paul.burton@mips.com> Patchwork: https://patchwork.linux-mips.org/patch/20949/ Cc: Ralf Baechle <ralf@linux-mips.org> Cc: linux-mips@linux-mips.org Cc: linux-kernel@vger.kernel.org Cc: stable@vger.kernel.org # v4.3+ --- arch/mips/cavium-octeon/executive/cvmx-helper.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/mips/cavium-octeon/executive/cvmx-helper.c b/arch/mips/cavium-octeon/executive/cvmx-helper.c index 75108ec669ebc..6c79e8a16a268 100644 --- a/arch/mips/cavium-octeon/executive/cvmx-helper.c +++ b/arch/mips/cavium-octeon/executive/cvmx-helper.c @@ -67,7 +67,7 @@ void (*cvmx_override_pko_queue_priority) (int pko_port, void (*cvmx_override_ipd_port_setup) (int ipd_port); /* Port count per interface */ -static int interface_port_count[5]; +static int interface_port_count[9]; /** * Return the number of interfaces the chip has. Each interface From d01501f85249848a2497968d46dd46d5c6fe32e6 Mon Sep 17 00:00:00 2001 From: "Maciej W. Rozycki" <macro@linux-mips.org> Date: Thu, 1 Nov 2018 07:54:24 +0000 Subject: [PATCH 126/443] MIPS: Fix `dma_alloc_coherent' returning a non-coherent allocation Fix a MIPS `dma_alloc_coherent' regression from commit bc3ec75de545 ("dma-mapping: merge direct and noncoherent ops") that causes a cached allocation to be returned on noncoherent cache systems. This is due to an inverted check now used in the MIPS implementation of `arch_dma_alloc' on the result from `dma_direct_alloc_pages' before doing the cached-to-uncached mapping of the allocation address obtained. The mapping has to be done for a non-NULL rather than NULL result, because a NULL result means the allocation has failed. Invert the check for correct operation then. Signed-off-by: Maciej W. Rozycki <macro@linux-mips.org> Signed-off-by: Paul Burton <paul.burton@mips.com> Reviewed-by: Christoph Hellwig <hch@lst.de> Fixes: bc3ec75de545 ("dma-mapping: merge direct and noncoherent ops") Patchwork: https://patchwork.linux-mips.org/patch/20965/ --- arch/mips/mm/dma-noncoherent.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/mips/mm/dma-noncoherent.c b/arch/mips/mm/dma-noncoherent.c index e6c9485cadcff..cb38461391cb7 100644 --- a/arch/mips/mm/dma-noncoherent.c +++ b/arch/mips/mm/dma-noncoherent.c @@ -50,7 +50,7 @@ void *arch_dma_alloc(struct device *dev, size_t size, void *ret; ret = dma_direct_alloc_pages(dev, size, dma_handle, gfp, attrs); - if (!ret && !(attrs & DMA_ATTR_NON_CONSISTENT)) { + if (ret && !(attrs & DMA_ATTR_NON_CONSISTENT)) { dma_cache_wback_inv((unsigned long) ret, size); ret = (void *)UNCAC_ADDR(ret); } From e2c39f36c354a06c6e9d32d4fdf8660b41803d82 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo <acme@redhat.com> Date: Mon, 5 Nov 2018 15:46:51 -0300 Subject: [PATCH 127/443] perf beauty: Use SRCARCH, ARCH=x86_64 must map to "x86" to find the headers Guenter reported that using ARCH=x86_64 to build perf has regressed: $ make -C tools/perf O=/tmp/build/perf ARCH=x86_64 make: Entering directory '/home/acme/git/perf/tools/perf' BUILD: Doing 'make -j4' parallel build HOSTCC /tmp/build/perf/fixdep.o HOSTLD /tmp/build/perf/fixdep-in.o LINK /tmp/build/perf/fixdep Auto-detecting system features: ... dwarf: [ on ] <SNIP> ... bpf: [ on ] GEN /tmp/build/perf/common-cmds.h make[2]: *** No rule to make target '/home/acme/git/perf/tools/arch/x86_64/include/uapi/asm//mman.h', needed by '/tmp/build/perf/trace/beauty/generated/mmap_flags_array.c'. Stop. make[2]: *** Waiting for unfinished jobs.... PERF_VERSION = 4.19.gf6c23e3 make[1]: *** [Makefile.perf:207: sub-make] Error 2 make: *** [Makefile:70: all] Error 2 make: Leaving directory '/home/acme/git/perf/tools/perf' $ This is because we must use $(SRCARCH) where we were using $(ARCH), so that, just like the top level Makefile, we get this done: # Additional ARCH settings for x86 ifeq ($(ARCH),i386) SRCARCH := x86 endif ifeq ($(ARCH),x86_64) SRCARCH := x86 endif Which is done in tools/scripts/Makefile.arch, so switch to use $(SRCARCH). Reported-by: Guenter Roeck <linux@roeck-us.net> Cc: Adrian Hunter <adrian.hunter@intel.com> Cc: Clark Williams <williams@redhat.com> Cc: David Ahern <dsahern@gmail.com> Cc: Jiri Olsa <jolsa@kernel.org> Cc: Namhyung Kim <namhyung@kernel.org> Cc: Wang Nan <wangnan0@huawei.com> Fixes: fbd7458db757 ("perf beauty: Wire up the mmap flags table generator to the Makefile") Link: https://lkml.kernel.org/r/20181105184612.GD7077@kernel.org Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com> --- tools/perf/Makefile.perf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/perf/Makefile.perf b/tools/perf/Makefile.perf index 3ccb4f0bf0883..d95655489f7e1 100644 --- a/tools/perf/Makefile.perf +++ b/tools/perf/Makefile.perf @@ -387,7 +387,7 @@ SHELL = $(SHELL_PATH) linux_uapi_dir := $(srctree)/tools/include/uapi/linux asm_generic_uapi_dir := $(srctree)/tools/include/uapi/asm-generic -arch_asm_uapi_dir := $(srctree)/tools/arch/$(ARCH)/include/uapi/asm/ +arch_asm_uapi_dir := $(srctree)/tools/arch/$(SRCARCH)/include/uapi/asm/ beauty_outdir := $(OUTPUT)trace/beauty/generated beauty_ioctl_outdir := $(beauty_outdir)/ioctl From fd5ba6ee3187617287fb9cb187e3d6b3631210a3 Mon Sep 17 00:00:00 2001 From: Aaro Koskinen <aaro.koskinen@nokia.com> Date: Fri, 2 Nov 2018 21:10:48 +0200 Subject: [PATCH 128/443] arm64: dts: stratix10: fix multicast filtering On Stratix 10, the EMAC has 256 hash buckets for multicast filtering. This needs to be specified in DTS, otherwise the stmmac driver defaults to 64 buckets and initializes the filter incorrectly. As a result, e.g. valid IPv6 multicast traffic ends up being dropped. Fixes: 78cd6a9d8e15 ("arm64: dts: Add base stratix 10 dtsi") Cc: stable@vger.kernel.org Signed-off-by: Aaro Koskinen <aaro.koskinen@nokia.com> Signed-off-by: Dinh Nguyen <dinguyen@kernel.org> --- arch/arm64/boot/dts/altera/socfpga_stratix10.dtsi | 3 +++ 1 file changed, 3 insertions(+) diff --git a/arch/arm64/boot/dts/altera/socfpga_stratix10.dtsi b/arch/arm64/boot/dts/altera/socfpga_stratix10.dtsi index 8253a1a9e9857..fef7351e9f677 100644 --- a/arch/arm64/boot/dts/altera/socfpga_stratix10.dtsi +++ b/arch/arm64/boot/dts/altera/socfpga_stratix10.dtsi @@ -139,6 +139,7 @@ clock-names = "stmmaceth"; tx-fifo-depth = <16384>; rx-fifo-depth = <16384>; + snps,multicast-filter-bins = <256>; status = "disabled"; }; @@ -154,6 +155,7 @@ clock-names = "stmmaceth"; tx-fifo-depth = <16384>; rx-fifo-depth = <16384>; + snps,multicast-filter-bins = <256>; status = "disabled"; }; @@ -169,6 +171,7 @@ clock-names = "stmmaceth"; tx-fifo-depth = <16384>; rx-fifo-depth = <16384>; + snps,multicast-filter-bins = <256>; status = "disabled"; }; From 6ac2226229d931153331a93d90655a3de05b9290 Mon Sep 17 00:00:00 2001 From: Gustavo Romero <gromero@linux.vnet.ibm.com> Date: Thu, 1 Nov 2018 20:13:21 -0400 Subject: [PATCH 129/443] perf tools: Fix undefined symbol scnprintf in libperf-jvmti.so Currently jvmti agent can not be used because function scnprintf is not present in the agent libperf-jvmti.so. As a result the JVM when using such agent to record JITed code profiling information will fail on looking up scnprintf: java: symbol lookup error: lib/libperf-jvmti.so: undefined symbol: scnprintf This commit fixes that by reverting to the use of snprintf, that can be looked up, instead of scnprintf, adding a proper check for the returned value in order to print a better error message when the jitdump file pathname is too long. Checking the returned value also helps to comply with some recent gcc versions, like gcc8, which will fail due to truncated writing checks related to the -Werror=format-truncation= flag. Signed-off-by: Gustavo Romero <gromero@linux.vnet.ibm.com> Acked-by: Jiri Olsa <jolsa@kernel.org> LPU-Reference: 1541117601-18937-2-git-send-email-gromero@linux.vnet.ibm.com Link: https://lkml.kernel.org/n/tip-mvpxxxy7wnzaj74cq75muw3f@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com> --- tools/perf/jvmti/jvmti_agent.c | 49 ++++++++++++++++++++++++++-------- 1 file changed, 38 insertions(+), 11 deletions(-) diff --git a/tools/perf/jvmti/jvmti_agent.c b/tools/perf/jvmti/jvmti_agent.c index ac1bcdc17dae7..f7eb63cbbc655 100644 --- a/tools/perf/jvmti/jvmti_agent.c +++ b/tools/perf/jvmti/jvmti_agent.c @@ -125,7 +125,7 @@ perf_get_timestamp(void) } static int -debug_cache_init(void) +create_jit_cache_dir(void) { char str[32]; char *base, *p; @@ -144,8 +144,13 @@ debug_cache_init(void) strftime(str, sizeof(str), JIT_LANG"-jit-%Y%m%d", &tm); - snprintf(jit_path, PATH_MAX - 1, "%s/.debug/", base); - + ret = snprintf(jit_path, PATH_MAX, "%s/.debug/", base); + if (ret >= PATH_MAX) { + warnx("jvmti: cannot generate jit cache dir because %s/.debug/" + " is too long, please check the cwd, JITDUMPDIR, and" + " HOME variables", base); + return -1; + } ret = mkdir(jit_path, 0755); if (ret == -1) { if (errno != EEXIST) { @@ -154,20 +159,32 @@ debug_cache_init(void) } } - snprintf(jit_path, PATH_MAX - 1, "%s/.debug/jit", base); + ret = snprintf(jit_path, PATH_MAX, "%s/.debug/jit", base); + if (ret >= PATH_MAX) { + warnx("jvmti: cannot generate jit cache dir because" + " %s/.debug/jit is too long, please check the cwd," + " JITDUMPDIR, and HOME variables", base); + return -1; + } ret = mkdir(jit_path, 0755); if (ret == -1) { if (errno != EEXIST) { - warn("cannot create jit cache dir %s", jit_path); + warn("jvmti: cannot create jit cache dir %s", jit_path); return -1; } } - snprintf(jit_path, PATH_MAX - 1, "%s/.debug/jit/%s.XXXXXXXX", base, str); - + ret = snprintf(jit_path, PATH_MAX, "%s/.debug/jit/%s.XXXXXXXX", base, str); + if (ret >= PATH_MAX) { + warnx("jvmti: cannot generate jit cache dir because" + " %s/.debug/jit/%s.XXXXXXXX is too long, please check" + " the cwd, JITDUMPDIR, and HOME variables", + base, str); + return -1; + } p = mkdtemp(jit_path); if (p != jit_path) { - warn("cannot create jit cache dir %s", jit_path); + warn("jvmti: cannot create jit cache dir %s", jit_path); return -1; } @@ -228,7 +245,7 @@ void *jvmti_open(void) { char dump_path[PATH_MAX]; struct jitheader header; - int fd; + int fd, ret; FILE *fp; init_arch_timestamp(); @@ -245,12 +262,22 @@ void *jvmti_open(void) memset(&header, 0, sizeof(header)); - debug_cache_init(); + /* + * jitdump file dir + */ + if (create_jit_cache_dir() < 0) + return NULL; /* * jitdump file name */ - scnprintf(dump_path, PATH_MAX, "%s/jit-%i.dump", jit_path, getpid()); + ret = snprintf(dump_path, PATH_MAX, "%s/jit-%i.dump", jit_path, getpid()); + if (ret >= PATH_MAX) { + warnx("jvmti: cannot generate jitdump file full path because" + " %s/jit-%i.dump is too long, please check the cwd," + " JITDUMPDIR, and HOME variables", jit_path, getpid()); + return NULL; + } fd = open(dump_path, O_CREAT|O_TRUNC|O_RDWR, 0666); if (fd == -1) From af31b04b67f4fd7f639fd465a507c154c46fc9fb Mon Sep 17 00:00:00 2001 From: Masayoshi Mizuma <m.mizuma@jp.fujitsu.com> Date: Tue, 30 Oct 2018 21:50:25 -0400 Subject: [PATCH 130/443] tools/testing/nvdimm: Fix the array size for dimm devices. KASAN reports following global out of bounds access while nfit_test is being loaded. The out of bound access happens the following reference to dimm_fail_cmd_flags[dimm]. 'dimm' is over than the index value, NUM_DCR (==5). static int override_return_code(int dimm, unsigned int func, int rc) { if ((1 << func) & dimm_fail_cmd_flags[dimm]) { dimm_fail_cmd_flags[] definition: static unsigned long dimm_fail_cmd_flags[NUM_DCR]; 'dimm' is the return value of get_dimm(), and get_dimm() returns the index of handle[] array. The handle[] has 7 index. Let's use ARRAY_SIZE(handle) as the array size. KASAN report: ================================================================== BUG: KASAN: global-out-of-bounds in nfit_test_ctl+0x47bb/0x55b0 [nfit_test] Read of size 8 at addr ffffffffc10cbbe8 by task kworker/u41:0/8 ... Call Trace: dump_stack+0xea/0x1b0 ? dump_stack_print_info.cold.0+0x1b/0x1b ? kmsg_dump_rewind_nolock+0xd9/0xd9 print_address_description+0x65/0x22e ? nfit_test_ctl+0x47bb/0x55b0 [nfit_test] kasan_report.cold.6+0x92/0x1a6 nfit_test_ctl+0x47bb/0x55b0 [nfit_test] ... The buggy address belongs to the variable: dimm_fail_cmd_flags+0x28/0xffffffffffffa440 [nfit_test] ================================================================== Fixes: 39611e83a28c ("tools/testing/nvdimm: Make DSM failure code injection...") Signed-off-by: Masayoshi Mizuma <m.mizuma@jp.fujitsu.com> Signed-off-by: Dan Williams <dan.j.williams@intel.com> --- tools/testing/nvdimm/test/nfit.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/tools/testing/nvdimm/test/nfit.c b/tools/testing/nvdimm/test/nfit.c index 9527d47a1070e..01ec04bf91b59 100644 --- a/tools/testing/nvdimm/test/nfit.c +++ b/tools/testing/nvdimm/test/nfit.c @@ -140,8 +140,8 @@ static u32 handle[] = { [6] = NFIT_DIMM_HANDLE(1, 0, 0, 0, 1), }; -static unsigned long dimm_fail_cmd_flags[NUM_DCR]; -static int dimm_fail_cmd_code[NUM_DCR]; +static unsigned long dimm_fail_cmd_flags[ARRAY_SIZE(handle)]; +static int dimm_fail_cmd_code[ARRAY_SIZE(handle)]; static const struct nd_intel_smart smart_def = { .flags = ND_INTEL_SMART_HEALTH_VALID @@ -205,7 +205,7 @@ struct nfit_test { unsigned long deadline; spinlock_t lock; } ars_state; - struct device *dimm_dev[NUM_DCR]; + struct device *dimm_dev[ARRAY_SIZE(handle)]; struct nd_intel_smart *smart; struct nd_intel_smart_threshold *smart_threshold; struct badrange badrange; @@ -2680,7 +2680,7 @@ static int nfit_test_probe(struct platform_device *pdev) u32 nfit_handle = __to_nfit_memdev(nfit_mem)->device_handle; int i; - for (i = 0; i < NUM_DCR; i++) + for (i = 0; i < ARRAY_SIZE(handle); i++) if (nfit_handle == handle[i]) dev_set_drvdata(nfit_test->dimm_dev[i], nfit_mem); From aba118389a6fb2ad7958de0f37b5869852bd38cf Mon Sep 17 00:00:00 2001 From: "Dmitry V. Levin" <ldv@altlinux.org> Date: Thu, 1 Nov 2018 14:03:08 +0300 Subject: [PATCH 131/443] uapi: fix linux/kfd_ioctl.h userspace compilation errors Consistently use types provided by <linux/types.h> via <drm/drm.h> to fix the following linux/kfd_ioctl.h userspace compilation errors: /usr/include/linux/kfd_ioctl.h:250:2: error: unknown type name 'uint32_t' uint32_t reset_type; /usr/include/linux/kfd_ioctl.h:251:2: error: unknown type name 'uint32_t' uint32_t reset_cause; /usr/include/linux/kfd_ioctl.h:252:2: error: unknown type name 'uint32_t' uint32_t memory_lost; /usr/include/linux/kfd_ioctl.h:253:2: error: unknown type name 'uint32_t' uint32_t gpu_id; Fixes: 0c119abad7f0d ("drm/amd: Add kfd ioctl defines for hw_exception event") Cc: <stable@vger.kernel.org> # v4.19 Signed-off-by: Dmitry V. Levin <ldv@altlinux.org> Reviewed-by: Felix Kuehling <Felix.Kuehling@amd.com> Signed-off-by: Alex Deucher <alexander.deucher@amd.com> --- include/uapi/linux/kfd_ioctl.h | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/include/uapi/linux/kfd_ioctl.h b/include/uapi/linux/kfd_ioctl.h index f5ff8a76e208f..dae897f38e593 100644 --- a/include/uapi/linux/kfd_ioctl.h +++ b/include/uapi/linux/kfd_ioctl.h @@ -255,10 +255,10 @@ struct kfd_hsa_memory_exception_data { /* hw exception data */ struct kfd_hsa_hw_exception_data { - uint32_t reset_type; - uint32_t reset_cause; - uint32_t memory_lost; - uint32_t gpu_id; + __u32 reset_type; + __u32 reset_cause; + __u32 memory_lost; + __u32 gpu_id; }; /* Event data */ From 8e7f91719db36440d63de37331367be9700ca0c7 Mon Sep 17 00:00:00 2001 From: "Dmitry V. Levin" <ldv@altlinux.org> Date: Thu, 1 Nov 2018 14:03:28 +0300 Subject: [PATCH 132/443] uapi: fix more linux/kfd_ioctl.h userspace compilation errors Consistently use types provided by <linux/types.h> via <drm/drm.h> to fix struct kfd_ioctl_get_queue_wave_state_args userspace compilation errors. Fixes: 5df099e8bc83f ("drm/amdkfd: Add wavefront context save state retrieval ioctl") Signed-off-by: Dmitry V. Levin <ldv@altlinux.org> Reviewed-by: Felix Kuehling <Felix.Kuehling@amd.com> Signed-off-by: Alex Deucher <alexander.deucher@amd.com> --- include/uapi/linux/kfd_ioctl.h | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/include/uapi/linux/kfd_ioctl.h b/include/uapi/linux/kfd_ioctl.h index dae897f38e593..b01eb502d49c5 100644 --- a/include/uapi/linux/kfd_ioctl.h +++ b/include/uapi/linux/kfd_ioctl.h @@ -83,11 +83,11 @@ struct kfd_ioctl_set_cu_mask_args { }; struct kfd_ioctl_get_queue_wave_state_args { - uint64_t ctl_stack_address; /* to KFD */ - uint32_t ctl_stack_used_size; /* from KFD */ - uint32_t save_area_used_size; /* from KFD */ - uint32_t queue_id; /* to KFD */ - uint32_t pad; + __u64 ctl_stack_address; /* to KFD */ + __u32 ctl_stack_used_size; /* from KFD */ + __u32 save_area_used_size; /* from KFD */ + __u32 queue_id; /* to KFD */ + __u32 pad; }; /* For kfd_ioctl_set_memory_policy_args.default_policy and alternate_policy */ From 6915ed86cca6a0b422e8ada05ec7009d2074b88a Mon Sep 17 00:00:00 2001 From: Jeff Barnhill <0xeffeff@gmail.com> Date: Mon, 5 Nov 2018 20:36:45 +0000 Subject: [PATCH 133/443] net/ipv6: Move anycast init/cleanup functions out of CONFIG_PROC_FS Move the anycast.c init and cleanup functions which were inadvertently added inside the CONFIG_PROC_FS definition. Fixes: 2384d02520ff ("net/ipv6: Add anycast addresses to a global hashtable") Signed-off-by: Jeff Barnhill <0xeffeff@gmail.com> Signed-off-by: David S. Miller <davem@davemloft.net> --- net/ipv6/anycast.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/ipv6/anycast.c b/net/ipv6/anycast.c index 7698637cf8276..94999058e1102 100644 --- a/net/ipv6/anycast.c +++ b/net/ipv6/anycast.c @@ -590,6 +590,7 @@ void ac6_proc_exit(struct net *net) { remove_proc_entry("anycast6", net->proc_net); } +#endif /* Init / cleanup code */ @@ -611,4 +612,3 @@ void ipv6_anycast_cleanup(void) WARN_ON(!hlist_empty(&inet6_acaddr_lst[i])); spin_unlock(&acaddr_hash_lock); } -#endif From 0773495b1f5f1c5e23551843f87b5ff37e7af8f7 Mon Sep 17 00:00:00 2001 From: Max Filippov <jcmvbkbc@gmail.com> Date: Sun, 4 Nov 2018 01:46:00 -0700 Subject: [PATCH 134/443] xtensa: make sure bFLT stack is 16 byte aligned Xtensa ABI requires stack alignment to be at least 16. In noMMU configuration ARCH_SLAB_MINALIGN is used to align stack. Make it at least 16. This fixes the following runtime error in noMMU configuration, caused by interaction between insufficiently aligned stack and alloca function, that results in corruption of on-stack variable in the libc function glob: Caught unhandled exception in 'sh' (pid = 47, pc = 0x02d05d65) - should not happen EXCCAUSE is 15 Cc: stable@vger.kernel.org Signed-off-by: Max Filippov <jcmvbkbc@gmail.com> --- arch/xtensa/include/asm/processor.h | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/arch/xtensa/include/asm/processor.h b/arch/xtensa/include/asm/processor.h index e4ccb88b79963..677bc76c1d707 100644 --- a/arch/xtensa/include/asm/processor.h +++ b/arch/xtensa/include/asm/processor.h @@ -23,7 +23,11 @@ # error Linux requires the Xtensa Windowed Registers Option. #endif -#define ARCH_SLAB_MINALIGN XCHAL_DATA_WIDTH +/* Xtensa ABI requires stack alignment to be at least 16 */ + +#define STACK_ALIGN (XCHAL_DATA_WIDTH > 16 ? XCHAL_DATA_WIDTH : 16) + +#define ARCH_SLAB_MINALIGN STACK_ALIGN /* * User space process size: 1 GB. From 91d7b67000c6e9bd605624079fee5a084238ad92 Mon Sep 17 00:00:00 2001 From: Christophe JAILLET <christophe.jaillet@wanadoo.fr> Date: Tue, 16 Oct 2018 09:13:46 +0200 Subject: [PATCH 135/443] mtd: spi-nor: cadence-quadspi: Return error code in cqspi_direct_read_execute() We return 0 unconditionally in 'cqspi_direct_read_execute()'. However, 'ret' is set to some error codes in several error handling paths. Return 'ret' instead to propagate the error code. Fixes: ffa639e069fb ("mtd: spi-nor: cadence-quadspi: Add DMA support for direct mode reads") Cc: <stable@vger.kernel.org> Signed-off-by: Christophe JAILLET <christophe.jaillet@wanadoo.fr> Signed-off-by: Boris Brezillon <boris.brezillon@bootlin.com> --- drivers/mtd/spi-nor/cadence-quadspi.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/mtd/spi-nor/cadence-quadspi.c b/drivers/mtd/spi-nor/cadence-quadspi.c index e24db817154ee..d846428ef038e 100644 --- a/drivers/mtd/spi-nor/cadence-quadspi.c +++ b/drivers/mtd/spi-nor/cadence-quadspi.c @@ -996,7 +996,7 @@ static int cqspi_direct_read_execute(struct spi_nor *nor, u_char *buf, err_unmap: dma_unmap_single(nor->dev, dma_dst, len, DMA_FROM_DEVICE); - return 0; + return ret; } static ssize_t cqspi_read(struct spi_nor *nor, loff_t from, From 90c31cb9a8115a1183daed9a714eea4be0687931 Mon Sep 17 00:00:00 2001 From: Boris Brezillon <boris.brezillon@bootlin.com> Date: Fri, 19 Oct 2018 11:02:22 +0200 Subject: [PATCH 136/443] mtd: spi-nor: Reset nor->addr_width when SFDP parsing failed Commit 5390a8df769e ("mtd: spi-nor: add support to non-uniform SFDP SPI NOR flash memories") removed the 'nor->addr_width = 0;' statement when spi_nor_parse_sfdp() returns an error, thus leaving ->addr_width in an undefined state which can cause trouble when spi_nor_scan() checks its value. Reported-by: Cyrille Pitchen <cyrille.pitchen@wedev4u.fr> Fixes: 5390a8df769e ("mtd: spi-nor: add support to non-uniform SFDP SPI NOR flash memories") Signed-off-by: Boris Brezillon <boris.brezillon@bootlin.com> Reviewed-by: Tudor Ambarus <tudor.ambarus@microchip.com> --- drivers/mtd/spi-nor/spi-nor.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/mtd/spi-nor/spi-nor.c b/drivers/mtd/spi-nor/spi-nor.c index 9407ca5f94433..3e54e31889c7b 100644 --- a/drivers/mtd/spi-nor/spi-nor.c +++ b/drivers/mtd/spi-nor/spi-nor.c @@ -3250,12 +3250,14 @@ static int spi_nor_init_params(struct spi_nor *nor, memcpy(&sfdp_params, params, sizeof(sfdp_params)); memcpy(&prev_map, &nor->erase_map, sizeof(prev_map)); - if (spi_nor_parse_sfdp(nor, &sfdp_params)) + if (spi_nor_parse_sfdp(nor, &sfdp_params)) { + nor->addr_width = 0; /* restore previous erase map */ memcpy(&nor->erase_map, &prev_map, sizeof(nor->erase_map)); - else + } else { memcpy(params, &sfdp_params, sizeof(*params)); + } } return 0; From 5e1acb4afacc6229946c3d2b7ffc422b53fb2448 Mon Sep 17 00:00:00 2001 From: Alexey Kodanev <alexey.kodanev@oracle.com> Date: Fri, 2 Nov 2018 19:11:04 +0300 Subject: [PATCH 137/443] rtnetlink: restore handling of dumpit return value in rtnl_dump_all() For non-zero return from dumpit() we should break the loop in rtnl_dump_all() and return the result. Otherwise, e.g., we could get the memory leak in inet6_dump_fib() [1]. The pointer to the allocated struct fib6_walker there (saved in cb->args) can be lost, reset on the next iteration. Fix it by partially restoring the previous behavior before commit c63586dc9b3e ("net: rtnl_dump_all needs to propagate error from dumpit function"). The returned error from dumpit() is still passed further. [1]: unreferenced object 0xffff88001322a200 (size 96): comm "sshd", pid 1484, jiffies 4296032768 (age 1432.542s) hex dump (first 32 bytes): 00 01 00 00 00 00 ad de 00 02 00 00 00 00 ad de ................ 18 09 41 36 00 88 ff ff 18 09 41 36 00 88 ff ff ..A6......A6.... backtrace: [<0000000095846b39>] kmem_cache_alloc_trace+0x151/0x220 [<000000007d12709f>] inet6_dump_fib+0x68d/0x940 [<000000002775a316>] rtnl_dump_all+0x1d9/0x2d0 [<00000000d7cd302b>] netlink_dump+0x945/0x11a0 [<000000002f43485f>] __netlink_dump_start+0x55d/0x800 [<00000000f76bbeec>] rtnetlink_rcv_msg+0x4fa/0xa00 [<000000009b5761f3>] netlink_rcv_skb+0x29c/0x420 [<0000000087a1dae1>] rtnetlink_rcv+0x15/0x20 [<00000000691b703b>] netlink_unicast+0x4e3/0x6c0 [<00000000b5be0204>] netlink_sendmsg+0x7f2/0xba0 [<0000000096d2aa60>] sock_sendmsg+0xba/0xf0 [<000000008c1b786f>] __sys_sendto+0x1e4/0x330 [<0000000019587b3f>] __x64_sys_sendto+0xe1/0x1a0 [<00000000071f4d56>] do_syscall_64+0x9f/0x300 [<000000002737577f>] entry_SYSCALL_64_after_hwframe+0x44/0xa9 [<0000000057587684>] 0xffffffffffffffff Fixes: c63586dc9b3e ("net: rtnl_dump_all needs to propagate error from dumpit function") Signed-off-by: Alexey Kodanev <alexey.kodanev@oracle.com> Signed-off-by: David S. Miller <davem@davemloft.net> --- net/core/rtnetlink.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c index e01274bd5e3e2..33d9227a8b807 100644 --- a/net/core/rtnetlink.c +++ b/net/core/rtnetlink.c @@ -3367,7 +3367,7 @@ static int rtnl_dump_all(struct sk_buff *skb, struct netlink_callback *cb) cb->seq = 0; } ret = dumpit(skb, cb); - if (ret < 0) + if (ret) break; } cb->family = idx; From e22d0bfa09a56e427a1a950ccb85621c86b343a4 Mon Sep 17 00:00:00 2001 From: Alexey Kodanev <alexey.kodanev@oracle.com> Date: Fri, 2 Nov 2018 19:11:05 +0300 Subject: [PATCH 138/443] ipv6: properly check return value in inet6_dump_all() Make sure we call fib6_dump_end() if it happens that skb->len is zero. rtnl_dump_all() can reset cb->args on the next loop iteration there. Fixes: 08e814c9e8eb ("net/ipv6: Bail early if user only wants cloned entries") Fixes: ae677bbb4441 ("net: Don't return invalid table id error when dumping all families") Signed-off-by: Alexey Kodanev <alexey.kodanev@oracle.com> Signed-off-by: David S. Miller <davem@davemloft.net> --- net/ipv6/ip6_fib.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/net/ipv6/ip6_fib.c b/net/ipv6/ip6_fib.c index 1b8bc008b53b6..ae3786132c236 100644 --- a/net/ipv6/ip6_fib.c +++ b/net/ipv6/ip6_fib.c @@ -591,7 +591,7 @@ static int inet6_dump_fib(struct sk_buff *skb, struct netlink_callback *cb) /* fib entries are never clones */ if (arg.filter.flags & RTM_F_CLONED) - return skb->len; + goto out; w = (void *)cb->args[2]; if (!w) { @@ -621,7 +621,7 @@ static int inet6_dump_fib(struct sk_buff *skb, struct netlink_callback *cb) tb = fib6_get_table(net, arg.filter.table_id); if (!tb) { if (arg.filter.dump_all_families) - return skb->len; + goto out; NL_SET_ERR_MSG_MOD(cb->extack, "FIB table does not exist"); return -ENOENT; From d016b4a3562b745fd9fa387a47d8de62ccd7e241 Mon Sep 17 00:00:00 2001 From: "Matwey V. Kornilov" <matwey@sai.msu.ru> Date: Fri, 2 Nov 2018 21:19:36 +0300 Subject: [PATCH 139/443] net: core: netpoll: Enable netconsole IPv6 link local address There is no reason to discard using source link local address when remote netconsole IPv6 address is set to be link local one. The patch allows administrators to use IPv6 netconsole without explicitly configuring source address: netconsole=@/,@fe80::5054:ff:fe2f:6012/ Signed-off-by: Matwey V. Kornilov <matwey@sai.msu.ru> Signed-off-by: David S. Miller <davem@davemloft.net> --- net/core/netpoll.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/net/core/netpoll.c b/net/core/netpoll.c index 5da9552b186bc..2b9fdbc43205f 100644 --- a/net/core/netpoll.c +++ b/net/core/netpoll.c @@ -717,7 +717,8 @@ int netpoll_setup(struct netpoll *np) read_lock_bh(&idev->lock); list_for_each_entry(ifp, &idev->addr_list, if_list) { - if (ipv6_addr_type(&ifp->addr) & IPV6_ADDR_LINKLOCAL) + if (!!(ipv6_addr_type(&ifp->addr) & IPV6_ADDR_LINKLOCAL) != + !!(ipv6_addr_type(&np->remote_ip.in6) & IPV6_ADDR_LINKLOCAL)) continue; np->local_ip.in6 = ifp->addr; err = 0; From c34c1287778b080ed692c0a46a8e345206cc29e6 Mon Sep 17 00:00:00 2001 From: Andrei Vagin <avagin@gmail.com> Date: Sun, 4 Nov 2018 22:37:15 -0800 Subject: [PATCH 140/443] sock_diag: fix autoloading of the raw_diag module IPPROTO_RAW isn't registred as an inet protocol, so inet_protos[protocol] is always NULL for it. Cc: Cyrill Gorcunov <gorcunov@gmail.com> Cc: Xin Long <lucien.xin@gmail.com> Fixes: bf2ae2e4bf93 ("sock_diag: request _diag module only when the family or proto has been registered") Signed-off-by: Andrei Vagin <avagin@gmail.com> Reviewed-by: Cyrill Gorcunov <gorcunov@gmail.com> Signed-off-by: David S. Miller <davem@davemloft.net> --- net/core/sock.c | 1 + 1 file changed, 1 insertion(+) diff --git a/net/core/sock.c b/net/core/sock.c index 6fcc4bc07d19b..080a880a1761b 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -3279,6 +3279,7 @@ int sock_load_diag_module(int family, int protocol) #ifdef CONFIG_INET if (family == AF_INET && + protocol != IPPROTO_RAW && !rcu_access_pointer(inet_protos[protocol])) return -ENOENT; #endif From 97adaddaa6db7a8af81b9b11e30cbe3628cd6700 Mon Sep 17 00:00:00 2001 From: Taehee Yoo <ap420073@gmail.com> Date: Mon, 5 Nov 2018 22:31:41 +0900 Subject: [PATCH 141/443] net: bpfilter: fix iptables failure if bpfilter_umh is disabled When iptables command is executed, ip_{set/get}sockopt() try to upload bpfilter.ko if bpfilter is enabled. if it couldn't find bpfilter.ko, command is failed. bpfilter.ko is generated if CONFIG_BPFILTER_UMH is enabled. ip_{set/get}sockopt() only checks CONFIG_BPFILTER. So that if CONFIG_BPFILTER is enabled and CONFIG_BPFILTER_UMH is disabled, iptables command is always failed. test config: CONFIG_BPFILTER=y # CONFIG_BPFILTER_UMH is not set test command: %iptables -L iptables: No chain/target/match by that name. Fixes: d2ba09c17a06 ("net: add skeleton of bpfilter kernel module") Signed-off-by: Taehee Yoo <ap420073@gmail.com> Signed-off-by: David S. Miller <davem@davemloft.net> --- net/ipv4/ip_sockglue.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/net/ipv4/ip_sockglue.c b/net/ipv4/ip_sockglue.c index 26c36cccabdc2..fffcc130900e5 100644 --- a/net/ipv4/ip_sockglue.c +++ b/net/ipv4/ip_sockglue.c @@ -1246,7 +1246,7 @@ int ip_setsockopt(struct sock *sk, int level, return -ENOPROTOOPT; err = do_ip_setsockopt(sk, level, optname, optval, optlen); -#ifdef CONFIG_BPFILTER +#if IS_ENABLED(CONFIG_BPFILTER_UMH) if (optname >= BPFILTER_IPT_SO_SET_REPLACE && optname < BPFILTER_IPT_SET_MAX) err = bpfilter_ip_set_sockopt(sk, optname, optval, optlen); @@ -1559,7 +1559,7 @@ int ip_getsockopt(struct sock *sk, int level, int err; err = do_ip_getsockopt(sk, level, optname, optval, optlen, 0); -#ifdef CONFIG_BPFILTER +#if IS_ENABLED(CONFIG_BPFILTER_UMH) if (optname >= BPFILTER_IPT_SO_GET_INFO && optname < BPFILTER_IPT_GET_MAX) err = bpfilter_ip_get_sockopt(sk, optname, optval, optlen); @@ -1596,7 +1596,7 @@ int compat_ip_getsockopt(struct sock *sk, int level, int optname, err = do_ip_getsockopt(sk, level, optname, optval, optlen, MSG_CMSG_COMPAT); -#ifdef CONFIG_BPFILTER +#if IS_ENABLED(CONFIG_BPFILTER_UMH) if (optname >= BPFILTER_IPT_SO_GET_INFO && optname < BPFILTER_IPT_GET_MAX) err = bpfilter_ip_get_sockopt(sk, optname, optval, optlen); From 7131193157414ac3167d7b2f2feb4c42b415d6c5 Mon Sep 17 00:00:00 2001 From: Rasmus Villemoes <linux@rasmusvillemoes.dk> Date: Mon, 5 Nov 2018 18:52:21 +0100 Subject: [PATCH 142/443] net: alx: make alx_drv_name static alx_drv_name is not used outside main.c, so there's no reason for it to have external linkage. Signed-off-by: Rasmus Villemoes <linux@rasmusvillemoes.dk> Signed-off-by: David S. Miller <davem@davemloft.net> --- drivers/net/ethernet/atheros/alx/alx.h | 1 - drivers/net/ethernet/atheros/alx/main.c | 2 +- 2 files changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/net/ethernet/atheros/alx/alx.h b/drivers/net/ethernet/atheros/alx/alx.h index 78c5de467426f..9d0e74f6b089d 100644 --- a/drivers/net/ethernet/atheros/alx/alx.h +++ b/drivers/net/ethernet/atheros/alx/alx.h @@ -140,6 +140,5 @@ struct alx_priv { }; extern const struct ethtool_ops alx_ethtool_ops; -extern const char alx_drv_name[]; #endif diff --git a/drivers/net/ethernet/atheros/alx/main.c b/drivers/net/ethernet/atheros/alx/main.c index 7968c644ad861..c131cfc1b79df 100644 --- a/drivers/net/ethernet/atheros/alx/main.c +++ b/drivers/net/ethernet/atheros/alx/main.c @@ -49,7 +49,7 @@ #include "hw.h" #include "reg.h" -const char alx_drv_name[] = "alx"; +static const char alx_drv_name[] = "alx"; static void alx_free_txbuf(struct alx_tx_queue *txq, int entry) { From 5841734fa6f997e57e8c29cbb6409bd74a6949e8 Mon Sep 17 00:00:00 2001 From: Bart Van Assche <bvanassche@acm.org> Date: Mon, 5 Nov 2018 16:44:59 -0800 Subject: [PATCH 143/443] scsi: target/core: Avoid that a kernel oops is triggered when COMPARE AND WRITE fails Fixes: aa73237dcb2d ("scsi: target/core: Always call transport_complete_callback() upon failure") Reviewed-by: David Disseldorp <ddiss@suse.de> Cc: Nicholas Bellinger <nab@linux-iscsi.org> Cc: Mike Christie <mchristi@redhat.com> Cc: Christoph Hellwig <hch@lst.de> Cc: Hannes Reinecke <hare@suse.de> Signed-off-by: Bart Van Assche <bvanassche@acm.org> Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com> --- drivers/target/target_core_transport.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/target/target_core_transport.c b/drivers/target/target_core_transport.c index e31e4fc31aa15..2cfd61d62e973 100644 --- a/drivers/target/target_core_transport.c +++ b/drivers/target/target_core_transport.c @@ -1778,7 +1778,7 @@ EXPORT_SYMBOL(target_submit_tmr); void transport_generic_request_failure(struct se_cmd *cmd, sense_reason_t sense_reason) { - int ret = 0; + int ret = 0, post_ret; pr_debug("-----[ Storage Engine Exception; sense_reason %d\n", sense_reason); @@ -1790,7 +1790,7 @@ void transport_generic_request_failure(struct se_cmd *cmd, transport_complete_task_attr(cmd); if (cmd->transport_complete_callback) - cmd->transport_complete_callback(cmd, false, NULL); + cmd->transport_complete_callback(cmd, false, &post_ret); if (transport_check_aborted_status(cmd, 1)) return; From f8f4adc1c1661686f492cf27817844a3d0517aff Mon Sep 17 00:00:00 2001 From: Arnd Bergmann <arnd@arndb.de> Date: Fri, 2 Nov 2018 16:34:49 +0100 Subject: [PATCH 144/443] scsi: myrb: fix sprintf buffer overflow warning gcc warns that the 12 byte fw_version field might not be long enough to contain the generated firmware name string: drivers/scsi/myrb.c: In function 'myrb_get_hba_config': drivers/scsi/myrb.c:1052:38: error: '%02d' directive writing between 2 and 3 bytes into a region of size between 2 and 5 [-Werror=format-overflow=] sprintf(cb->fw_version, "%d.%02d-%c-%02d", ^~~~ drivers/scsi/myrb.c:1052:26: note: directive argument in the range [0, 255] sprintf(cb->fw_version, "%d.%02d-%c-%02d", ^~~~~~~~~~~~~~~~~ drivers/scsi/myrb.c:1052:2: note: 'sprintf' output between 10 and 14 bytes into a destination of size 12 sprintf(cb->fw_version, "%d.%02d-%c-%02d", ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ enquiry2->fw.major_version, ~~~~~~~~~~~~~~~~~~~~~~~~~~~ enquiry2->fw.minor_version, ~~~~~~~~~~~~~~~~~~~~~~~~~~~ enquiry2->fw.firmware_type, ~~~~~~~~~~~~~~~~~~~~~~~~~~~ enquiry2->fw.turn_id); ~~~~~~~~~~~~~~~~~~~~~ I have not checked whether there are appropriate range checks before the sprintf, but there is a range check after it that will bail out in case of out of range version numbers. This means we can simply use snprintf() instead of sprintf() to limit the output buffer size, and it will work correctly. Fixes: 081ff398c56c ("scsi: myrb: Add Mylex RAID controller (block interface)") Signed-off-by: Arnd Bergmann <arnd@arndb.de> Reviewed-by: Hannes Reinecke <hare@suse.com> Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com> --- drivers/scsi/myrb.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/scsi/myrb.c b/drivers/scsi/myrb.c index aeb282f617c5c..0642f2d0a3bb6 100644 --- a/drivers/scsi/myrb.c +++ b/drivers/scsi/myrb.c @@ -1049,7 +1049,8 @@ static int myrb_get_hba_config(struct myrb_hba *cb) enquiry2->fw.firmware_type = '0'; enquiry2->fw.turn_id = 0; } - sprintf(cb->fw_version, "%d.%02d-%c-%02d", + snprintf(cb->fw_version, sizeof(cb->fw_version), + "%d.%02d-%c-%02d", enquiry2->fw.major_version, enquiry2->fw.minor_version, enquiry2->fw.firmware_type, From f8d294324598ec85bea2779512e48c94cbe4d7c6 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann <arnd@arndb.de> Date: Fri, 2 Nov 2018 16:35:48 +0100 Subject: [PATCH 145/443] scsi: lpfc: fix remoteport access The addition of a spinlock in lpfc_debugfs_nodelist_data() introduced a bug that lets us not skip NULL pointers correctly, as noticed by gcc-8: drivers/scsi/lpfc/lpfc_debugfs.c: In function 'lpfc_debugfs_nodelist_data.constprop': drivers/scsi/lpfc/lpfc_debugfs.c:728:13: error: 'nrport' may be used uninitialized in this function [-Werror=maybe-uninitialized] if (nrport->port_role & FC_PORT_ROLE_NVME_INITIATOR) This changes the logic back to what it was, while keeping the added spinlock. Fixes: 9e210178267b ("scsi: lpfc: Synchronize access to remoteport via rport") Signed-off-by: Arnd Bergmann <arnd@arndb.de> Reviewed-by: Johannes Thumshirn <jthumshirn@suse.de> Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com> --- drivers/scsi/lpfc/lpfc_debugfs.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/scsi/lpfc/lpfc_debugfs.c b/drivers/scsi/lpfc/lpfc_debugfs.c index 0c8005bb0f53f..34d311a7dbef1 100644 --- a/drivers/scsi/lpfc/lpfc_debugfs.c +++ b/drivers/scsi/lpfc/lpfc_debugfs.c @@ -698,6 +698,8 @@ lpfc_debugfs_nodelist_data(struct lpfc_vport *vport, char *buf, int size) rport = lpfc_ndlp_get_nrport(ndlp); if (rport) nrport = rport->remoteport; + else + nrport = NULL; spin_unlock(&phba->hbalock); if (!nrport) continue; From 77409c4cdc44560e1b3b839e62d7f73478199680 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann <arnd@arndb.de> Date: Fri, 2 Nov 2018 16:44:13 +0100 Subject: [PATCH 146/443] scsi: myrs: avoid stack overflow warning Putting a 1024 byte data structure on the stack is generally a bad idea. On 32-bit systems, it also triggers a compile-time warning when building with -Og: drivers/scsi/myrs.c: In function 'myrs_get_ctlr_info': drivers/scsi/myrs.c:212:1: error: the frame size of 1028 bytes is larger than 1024 bytes [-Werror=frame-larger-than=] We only really need three members of the structure, so just read them manually here instead of copying the entire structure. Fixes: 77266186397c ("scsi: myrs: Add Mylex RAID controller (SCSI interface)") Signed-off-by: Arnd Bergmann <arnd@arndb.de> Reviewed-by: Hannes Reinecke <hare@suse.com> Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com> --- drivers/scsi/myrs.c | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/drivers/scsi/myrs.c b/drivers/scsi/myrs.c index 0264a2e2bc190..b8d54ef8cf6df 100644 --- a/drivers/scsi/myrs.c +++ b/drivers/scsi/myrs.c @@ -163,9 +163,12 @@ static unsigned char myrs_get_ctlr_info(struct myrs_hba *cs) dma_addr_t ctlr_info_addr; union myrs_sgl *sgl; unsigned char status; - struct myrs_ctlr_info old; + unsigned short ldev_present, ldev_critical, ldev_offline; + + ldev_present = cs->ctlr_info->ldev_present; + ldev_critical = cs->ctlr_info->ldev_critical; + ldev_offline = cs->ctlr_info->ldev_offline; - memcpy(&old, cs->ctlr_info, sizeof(struct myrs_ctlr_info)); ctlr_info_addr = dma_map_single(&cs->pdev->dev, cs->ctlr_info, sizeof(struct myrs_ctlr_info), DMA_FROM_DEVICE); @@ -198,9 +201,9 @@ static unsigned char myrs_get_ctlr_info(struct myrs_hba *cs) cs->ctlr_info->rbld_active + cs->ctlr_info->exp_active != 0) cs->needs_update = true; - if (cs->ctlr_info->ldev_present != old.ldev_present || - cs->ctlr_info->ldev_critical != old.ldev_critical || - cs->ctlr_info->ldev_offline != old.ldev_offline) + if (cs->ctlr_info->ldev_present != ldev_present || + cs->ctlr_info->ldev_critical != ldev_critical || + cs->ctlr_info->ldev_offline != ldev_offline) shost_printk(KERN_INFO, cs->host, "Logical drive count changes (%d/%d/%d)\n", cs->ctlr_info->ldev_critical, From a3ecf48248a393438e77e569a0047e968e0ec6c6 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann <arnd@arndb.de> Date: Fri, 2 Nov 2018 16:47:23 +0100 Subject: [PATCH 147/443] scsi: myrs: only build on little-endian platforms Reading throught the new driver, I noticed that this cannot work on big-endian CPUs, and the old DAC960 had exactly the same behavior. To document this for the future, add a Kconfig dependency that prevents it from being included in big-endian kernels. Since the hardware is really old and we never had a working driver on it for big-endian platforms, it's unlikely to make a difference to users. Signed-off-by: Arnd Bergmann <arnd@arndb.de> Reviewed-by: Hannes Reinecke <hare@suse.com> Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com> --- drivers/scsi/Kconfig | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/scsi/Kconfig b/drivers/scsi/Kconfig index f07444d30b216..640cd1b31a18d 100644 --- a/drivers/scsi/Kconfig +++ b/drivers/scsi/Kconfig @@ -578,6 +578,7 @@ config SCSI_MYRB config SCSI_MYRS tristate "Mylex DAC960/DAC1100 PCI RAID Controller (SCSI Interface)" depends on PCI + depends on !CPU_BIG_ENDIAN || COMPILE_TEST select RAID_ATTRS help This driver adds support for the Mylex DAC960, AcceleRAID, and From e34ff8edcae89922d187425ab0b82e6a039aa371 Mon Sep 17 00:00:00 2001 From: YueHaibing <yuehaibing@huawei.com> Date: Fri, 26 Oct 2018 15:07:31 +0800 Subject: [PATCH 148/443] scsi: hisi_sas: Remove set but not used variable 'dq_list' Fixes gcc '-Wunused-but-set-variable' warning: drivers/scsi/hisi_sas/hisi_sas_v1_hw.c: In function 'start_delivery_v1_hw': drivers/scsi/hisi_sas/hisi_sas_v1_hw.c:907:20: warning: variable 'dq_list' set but not used [-Wunused-but-set-variable] drivers/scsi/hisi_sas/hisi_sas_v2_hw.c: In function 'start_delivery_v2_hw': drivers/scsi/hisi_sas/hisi_sas_v2_hw.c:1671:20: warning: variable 'dq_list' set but not used [-Wunused-but-set-variable] drivers/scsi/hisi_sas/hisi_sas_v3_hw.c: In function 'start_delivery_v3_hw': drivers/scsi/hisi_sas/hisi_sas_v3_hw.c:889:20: warning: variable 'dq_list' set but not used [-Wunused-but-set-variable] It never used since introduction in commit fa222db0b036 ("scsi: hisi_sas: Don't lock DQ for complete task sending") Signed-off-by: YueHaibing <yuehaibing@huawei.com> Acked-by: John Garry <john.garry@huawei.com> Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com> --- drivers/scsi/hisi_sas/hisi_sas_v1_hw.c | 2 -- drivers/scsi/hisi_sas/hisi_sas_v2_hw.c | 2 -- drivers/scsi/hisi_sas/hisi_sas_v3_hw.c | 2 -- 3 files changed, 6 deletions(-) diff --git a/drivers/scsi/hisi_sas/hisi_sas_v1_hw.c b/drivers/scsi/hisi_sas/hisi_sas_v1_hw.c index f0e457e6884e5..8df822a4a1bd6 100644 --- a/drivers/scsi/hisi_sas/hisi_sas_v1_hw.c +++ b/drivers/scsi/hisi_sas/hisi_sas_v1_hw.c @@ -904,11 +904,9 @@ static void start_delivery_v1_hw(struct hisi_sas_dq *dq) { struct hisi_hba *hisi_hba = dq->hisi_hba; struct hisi_sas_slot *s, *s1, *s2 = NULL; - struct list_head *dq_list; int dlvry_queue = dq->id; int wp; - dq_list = &dq->list; list_for_each_entry_safe(s, s1, &dq->list, delivery) { if (!s->ready) break; diff --git a/drivers/scsi/hisi_sas/hisi_sas_v2_hw.c b/drivers/scsi/hisi_sas/hisi_sas_v2_hw.c index cc36b6473e986..77a85ead483e0 100644 --- a/drivers/scsi/hisi_sas/hisi_sas_v2_hw.c +++ b/drivers/scsi/hisi_sas/hisi_sas_v2_hw.c @@ -1670,11 +1670,9 @@ static void start_delivery_v2_hw(struct hisi_sas_dq *dq) { struct hisi_hba *hisi_hba = dq->hisi_hba; struct hisi_sas_slot *s, *s1, *s2 = NULL; - struct list_head *dq_list; int dlvry_queue = dq->id; int wp; - dq_list = &dq->list; list_for_each_entry_safe(s, s1, &dq->list, delivery) { if (!s->ready) break; diff --git a/drivers/scsi/hisi_sas/hisi_sas_v3_hw.c b/drivers/scsi/hisi_sas/hisi_sas_v3_hw.c index bd4ce38b98d22..a369450a1fa7b 100644 --- a/drivers/scsi/hisi_sas/hisi_sas_v3_hw.c +++ b/drivers/scsi/hisi_sas/hisi_sas_v3_hw.c @@ -886,11 +886,9 @@ static void start_delivery_v3_hw(struct hisi_sas_dq *dq) { struct hisi_hba *hisi_hba = dq->hisi_hba; struct hisi_sas_slot *s, *s1, *s2 = NULL; - struct list_head *dq_list; int dlvry_queue = dq->id; int wp; - dq_list = &dq->list; list_for_each_entry_safe(s, s1, &dq->list, delivery) { if (!s->ready) break; From 0d52e642c0ccd7a877a58b6ec23552eb35e7170c Mon Sep 17 00:00:00 2001 From: Masanari Iida <standby24x7@gmail.com> Date: Sun, 28 Oct 2018 14:05:48 +0900 Subject: [PATCH 149/443] scsi: qla2xxx: Fix a typo in MODULE_PARM_DESC This patch fixes a spelling typo in MODULE_PARM_DESC of ql2xplogiabsentdevice. Signed-off-by: Masanari Iida <standby24x7@gmail.com> Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com> --- drivers/scsi/qla2xxx/qla_os.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/scsi/qla2xxx/qla_os.c b/drivers/scsi/qla2xxx/qla_os.c index 518f15141170e..20c85eed1a750 100644 --- a/drivers/scsi/qla2xxx/qla_os.c +++ b/drivers/scsi/qla2xxx/qla_os.c @@ -67,7 +67,7 @@ module_param(ql2xplogiabsentdevice, int, S_IRUGO|S_IWUSR); MODULE_PARM_DESC(ql2xplogiabsentdevice, "Option to enable PLOGI to devices that are not present after " "a Fabric scan. This is needed for several broken switches. " - "Default is 0 - no PLOGI. 1 - perfom PLOGI."); + "Default is 0 - no PLOGI. 1 - perform PLOGI."); int ql2xloginretrycount = 0; module_param(ql2xloginretrycount, int, S_IRUGO); From 96edebd6bb995f2acb7694bed6e01bf6f5a7b634 Mon Sep 17 00:00:00 2001 From: Finn Thain <fthain@telegraphics.com.au> Date: Wed, 24 Oct 2018 18:45:33 +1100 Subject: [PATCH 150/443] scsi: NCR5380: Return false instead of NULL I overlooked this statement when I recently converted the function result type from struct scsi_cmnd * to bool. No change to object code. Signed-off-by: Finn Thain <fthain@telegraphics.com.au> Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com> --- drivers/scsi/NCR5380.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/scsi/NCR5380.c b/drivers/scsi/NCR5380.c index 8429c855701fc..01c23d27f290b 100644 --- a/drivers/scsi/NCR5380.c +++ b/drivers/scsi/NCR5380.c @@ -1198,7 +1198,7 @@ static bool NCR5380_select(struct Scsi_Host *instance, struct scsi_cmnd *cmd) out: if (!hostdata->selecting) - return NULL; + return false; hostdata->selecting = NULL; return ret; } From 0ae790683fc28bb718d74f87cdf753c6445fe28d Mon Sep 17 00:00:00 2001 From: Michael Ellerman <mpe@ellerman.id.au> Date: Tue, 6 Nov 2018 19:23:28 +1100 Subject: [PATCH 151/443] powerpc/mm/64s: Consolidate SLB assertions The code for assert_slb_exists() and assert_slb_notexists() is almost identical, except for the polarity of the WARN_ON(). In a future patch we'll need to modify this code, so consolidate it now into a single function. Signed-off-by: Michael Ellerman <mpe@ellerman.id.au> --- arch/powerpc/mm/slb.c | 29 +++++++++-------------------- 1 file changed, 9 insertions(+), 20 deletions(-) diff --git a/arch/powerpc/mm/slb.c b/arch/powerpc/mm/slb.c index c3fdf2969d9fa..f3e002ee457be 100644 --- a/arch/powerpc/mm/slb.c +++ b/arch/powerpc/mm/slb.c @@ -58,7 +58,7 @@ static inline unsigned long mk_vsid_data(unsigned long ea, int ssize, return __mk_vsid_data(get_kernel_vsid(ea, ssize), ssize, flags); } -static void assert_slb_exists(unsigned long ea) +static void assert_slb_presence(bool present, unsigned long ea) { #ifdef CONFIG_DEBUG_VM unsigned long tmp; @@ -66,19 +66,8 @@ static void assert_slb_exists(unsigned long ea) WARN_ON_ONCE(mfmsr() & MSR_EE); asm volatile("slbfee. %0, %1" : "=r"(tmp) : "r"(ea) : "cr0"); - WARN_ON(tmp == 0); -#endif -} -static void assert_slb_notexists(unsigned long ea) -{ -#ifdef CONFIG_DEBUG_VM - unsigned long tmp; - - WARN_ON_ONCE(mfmsr() & MSR_EE); - - asm volatile("slbfee. %0, %1" : "=r"(tmp) : "r"(ea) : "cr0"); - WARN_ON(tmp != 0); + WARN_ON(present == (tmp == 0)); #endif } @@ -114,7 +103,7 @@ static inline void create_shadowed_slbe(unsigned long ea, int ssize, */ slb_shadow_update(ea, ssize, flags, index); - assert_slb_notexists(ea); + assert_slb_presence(false, ea); asm volatile("slbmte %0,%1" : : "r" (mk_vsid_data(ea, ssize, flags)), "r" (mk_esid_data(ea, ssize, index)) @@ -137,7 +126,7 @@ void __slb_restore_bolted_realmode(void) "r" (be64_to_cpu(p->save_area[index].esid))); } - assert_slb_exists(local_paca->kstack); + assert_slb_presence(true, local_paca->kstack); } /* @@ -185,7 +174,7 @@ void slb_flush_and_restore_bolted(void) :: "r" (be64_to_cpu(p->save_area[KSTACK_INDEX].vsid)), "r" (be64_to_cpu(p->save_area[KSTACK_INDEX].esid)) : "memory"); - assert_slb_exists(get_paca()->kstack); + assert_slb_presence(true, get_paca()->kstack); get_paca()->slb_cache_ptr = 0; @@ -443,9 +432,9 @@ void switch_slb(struct task_struct *tsk, struct mm_struct *mm) ea = (unsigned long) get_paca()->slb_cache[i] << SID_SHIFT; /* - * Could assert_slb_exists here, but hypervisor - * or machine check could have come in and - * removed the entry at this point. + * Could assert_slb_presence(true) here, but + * hypervisor or machine check could have come + * in and removed the entry at this point. */ slbie_data = ea; @@ -676,7 +665,7 @@ static long slb_insert_entry(unsigned long ea, unsigned long context, * User preloads should add isync afterwards in case the kernel * accesses user memory before it returns to userspace with rfid. */ - assert_slb_notexists(ea); + assert_slb_presence(false, ea); asm volatile("slbmte %0, %1" : : "r" (vsid_data), "r" (esid_data)); barrier(); From 08e6a3434e2125e4b21d0d3f84678d427345bc0d Mon Sep 17 00:00:00 2001 From: Michael Ellerman <mpe@ellerman.id.au> Date: Tue, 6 Nov 2018 19:25:18 +1100 Subject: [PATCH 152/443] powerpc/mm/64s: Use PPC_SLBFEE macro Old toolchains don't know about slbfee and break the build, eg: {standard input}:37: Error: Unrecognized opcode: `slbfee.' Fix it by using the macro version. We need to add an underscore version that takes raw register numbers from the inline asm, rather than our Rx macros. Fixes: e15a4fea4dee ("powerpc/64s/hash: Add some SLB debugging tests") Signed-off-by: Michael Ellerman <mpe@ellerman.id.au> --- arch/powerpc/include/asm/ppc-opcode.h | 2 ++ arch/powerpc/mm/slb.c | 3 ++- 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/arch/powerpc/include/asm/ppc-opcode.h b/arch/powerpc/include/asm/ppc-opcode.h index 6093bc8f74e51..a6e9e314c7077 100644 --- a/arch/powerpc/include/asm/ppc-opcode.h +++ b/arch/powerpc/include/asm/ppc-opcode.h @@ -493,6 +493,8 @@ __PPC_RS(t) | __PPC_RA0(a) | __PPC_RB(b)) #define PPC_SLBFEE_DOT(t, b) stringify_in_c(.long PPC_INST_SLBFEE | \ __PPC_RT(t) | __PPC_RB(b)) +#define __PPC_SLBFEE_DOT(t, b) stringify_in_c(.long PPC_INST_SLBFEE | \ + ___PPC_RT(t) | ___PPC_RB(b)) #define PPC_ICBT(c,a,b) stringify_in_c(.long PPC_INST_ICBT | \ __PPC_CT(c) | __PPC_RA0(a) | __PPC_RB(b)) /* PASemi instructions */ diff --git a/arch/powerpc/mm/slb.c b/arch/powerpc/mm/slb.c index f3e002ee457be..457fd29448b1b 100644 --- a/arch/powerpc/mm/slb.c +++ b/arch/powerpc/mm/slb.c @@ -19,6 +19,7 @@ #include <asm/mmu.h> #include <asm/mmu_context.h> #include <asm/paca.h> +#include <asm/ppc-opcode.h> #include <asm/cputable.h> #include <asm/cacheflush.h> #include <asm/smp.h> @@ -65,7 +66,7 @@ static void assert_slb_presence(bool present, unsigned long ea) WARN_ON_ONCE(mfmsr() & MSR_EE); - asm volatile("slbfee. %0, %1" : "=r"(tmp) : "r"(ea) : "cr0"); + asm volatile(__PPC_SLBFEE_DOT(%0, %1) : "=r"(tmp) : "r"(ea) : "cr0"); WARN_ON(present == (tmp == 0)); #endif From 9586d569a369dc585a3e191dcabd72748e3c9c5c Mon Sep 17 00:00:00 2001 From: Michael Ellerman <mpe@ellerman.id.au> Date: Tue, 6 Nov 2018 19:25:38 +1100 Subject: [PATCH 153/443] powerpc/mm/64s: Only use slbfee on CPUs that support it The slbfee instruction was only added in ISA 2.05 (Power6), it's not supported on older CPUs. We don't have a CPU feature for that ISA version though, so just use the ISA 2.06 feature flag. Fixes: e15a4fea4dee ("powerpc/64s/hash: Add some SLB debugging tests") Signed-off-by: Michael Ellerman <mpe@ellerman.id.au> --- arch/powerpc/mm/slb.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/arch/powerpc/mm/slb.c b/arch/powerpc/mm/slb.c index 457fd29448b1b..b663a36f9ada1 100644 --- a/arch/powerpc/mm/slb.c +++ b/arch/powerpc/mm/slb.c @@ -66,6 +66,9 @@ static void assert_slb_presence(bool present, unsigned long ea) WARN_ON_ONCE(mfmsr() & MSR_EE); + if (!cpu_has_feature(CPU_FTR_ARCH_206)) + return; + asm volatile(__PPC_SLBFEE_DOT(%0, %1) : "=r"(tmp) : "r"(ea) : "cr0"); WARN_ON(present == (tmp == 0)); From d9cccfa7c4d1d9ef967ec9308df7304a18609b30 Mon Sep 17 00:00:00 2001 From: Liam Merwick <Liam.Merwick@oracle.com> Date: Fri, 2 Nov 2018 14:04:23 +0000 Subject: [PATCH 154/443] xen/grant-table: Fix incorrect gnttab_dma_free_pages() pr_debug message If a call to xenmem_reservation_increase() in gnttab_dma_free_pages() fails it triggers a message "Failed to decrease reservation..." which should be "Failed to increase reservation..." Fixes: 9bdc7304f536 ('xen/grant-table: Allow allocating buffers suitable for DMA') Reported-by: Ross Philipson <ross.philipson@oracle.com> Signed-off-by: Liam Merwick <liam.merwick@oracle.com> Reviewed-by: Mark Kanda <mark.kanda@oracle.com> Reviewed-by: Juergen Gross <jgross@suse.com> Signed-off-by: Juergen Gross <jgross@suse.com> --- drivers/xen/grant-table.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/xen/grant-table.c b/drivers/xen/grant-table.c index 84575baceebc8..97341fa75458e 100644 --- a/drivers/xen/grant-table.c +++ b/drivers/xen/grant-table.c @@ -914,7 +914,7 @@ int gnttab_dma_free_pages(struct gnttab_dma_alloc_args *args) ret = xenmem_reservation_increase(args->nr_pages, args->frames); if (ret != args->nr_pages) { - pr_debug("Failed to decrease reservation for DMA buffer\n"); + pr_debug("Failed to increase reservation for DMA buffer\n"); ret = -EFAULT; } else { ret = 0; From 6cc4a0863c9709c512280c64e698d68443ac8053 Mon Sep 17 00:00:00 2001 From: Manjunath Patil <manjunath.b.patil@oracle.com> Date: Tue, 30 Oct 2018 09:49:21 -0700 Subject: [PATCH 155/443] xen-blkfront: fix kernel panic with negotiate_mq error path MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit info->nr_rings isn't adjusted in case of ENOMEM error from negotiate_mq(). This leads to kernel panic in error path. Typical call stack involving panic - #8 page_fault at ffffffff8175936f [exception RIP: blkif_free_ring+33] RIP: ffffffffa0149491 RSP: ffff8804f7673c08 RFLAGS: 00010292 ... #9 blkif_free at ffffffffa0149aaa [xen_blkfront] #10 talk_to_blkback at ffffffffa014c8cd [xen_blkfront] #11 blkback_changed at ffffffffa014ea8b [xen_blkfront] #12 xenbus_otherend_changed at ffffffff81424670 #13 backend_changed at ffffffff81426dc3 #14 xenwatch_thread at ffffffff81422f29 #15 kthread at ffffffff810abe6a #16 ret_from_fork at ffffffff81754078 Cc: stable@vger.kernel.org Fixes: 7ed8ce1c5fc7 ("xen-blkfront: move negotiate_mq to cover all cases of new VBDs") Signed-off-by: Manjunath Patil <manjunath.b.patil@oracle.com> Acked-by: Roger Pau Monné <roger.pau@citrix.com> Signed-off-by: Juergen Gross <jgross@suse.com> --- drivers/block/xen-blkfront.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/block/xen-blkfront.c b/drivers/block/xen-blkfront.c index 9eea83ae01c6f..9ac3999a55b7a 100644 --- a/drivers/block/xen-blkfront.c +++ b/drivers/block/xen-blkfront.c @@ -1919,6 +1919,7 @@ static int negotiate_mq(struct blkfront_info *info) GFP_KERNEL); if (!info->rinfo) { xenbus_dev_fatal(info->xbdev, -ENOMEM, "allocating ring_info structure"); + info->nr_rings = 0; return -ENOMEM; } From ba26cd7d58dcc50e25c8f02b5376c05046b181dc Mon Sep 17 00:00:00 2001 From: Boris Brezillon <boris.brezillon@bootlin.com> Date: Mon, 5 Nov 2018 08:58:35 +0100 Subject: [PATCH 156/443] mtd: sa1100: avoid VLA in sa1100_setup_mtd Enabling -Wvla found another variable-length array with randconfig testing: drivers/mtd/maps/sa1100-flash.c: In function 'sa1100_setup_mtd': drivers/mtd/maps/sa1100-flash.c:224:10: error: ISO C90 forbids variable length array 'cdev' [-Werror=vla] Dynamically allocate the cdev array passed to mtd_concat_create() instead of using a VLA. Reported-by: Arnd Bergmann <arnd@arndb.de> Signed-off-by: Boris Brezillon <boris.brezillon@bootlin.com> Cc: Kees Cook <keescook@chromium.org> Cc: Olof Johansson <olof@lixom.net> --- drivers/mtd/maps/sa1100-flash.c | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/drivers/mtd/maps/sa1100-flash.c b/drivers/mtd/maps/sa1100-flash.c index 784c6e1a0391e..fd5fe12d74613 100644 --- a/drivers/mtd/maps/sa1100-flash.c +++ b/drivers/mtd/maps/sa1100-flash.c @@ -221,7 +221,14 @@ static struct sa_info *sa1100_setup_mtd(struct platform_device *pdev, info->mtd = info->subdev[0].mtd; ret = 0; } else if (info->num_subdev > 1) { - struct mtd_info *cdev[nr]; + struct mtd_info **cdev; + + cdev = kmalloc_array(nr, sizeof(*cdev), GFP_KERNEL); + if (!cdev) { + ret = -ENOMEM; + goto err; + } + /* * We detected multiple devices. Concatenate them together. */ @@ -230,6 +237,7 @@ static struct sa_info *sa1100_setup_mtd(struct platform_device *pdev, info->mtd = mtd_concat_create(cdev, info->num_subdev, plat->name); + kfree(cdev); if (info->mtd == NULL) { ret = -ENXIO; goto err; From 6282e916f774e37845c65d1eae9f8c649004f033 Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel <ard.biesheuvel@linaro.org> Date: Mon, 5 Nov 2018 14:54:56 +0100 Subject: [PATCH 157/443] ARM: 8809/1: proc-v7: fix Thumb annotation of cpu_v7_hvc_switch_mm Due to what appears to be a copy/paste error, the opening ENTRY() of cpu_v7_hvc_switch_mm() lacks a matching ENDPROC(), and instead, the one for cpu_v7_smc_switch_mm() is duplicated. Given that it is ENDPROC() that emits the Thumb annotation, the cpu_v7_hvc_switch_mm() routine will be called in ARM mode on a Thumb2 kernel, resulting in the following splat: Internal error: Oops - undefined instruction: 0 [#1] SMP THUMB2 Modules linked in: CPU: 0 PID: 1 Comm: swapper/0 Not tainted 4.18.0-rc1-00030-g4d28ad89189d-dirty #488 Hardware name: QEMU KVM Virtual Machine, BIOS 0.0.0 02/06/2015 PC is at cpu_v7_hvc_switch_mm+0x12/0x18 LR is at flush_old_exec+0x31b/0x570 pc : [<c0316efe>] lr : [<c04117c7>] psr: 00000013 sp : ee899e50 ip : 00000000 fp : 00000001 r10: eda28f34 r9 : eda31800 r8 : c12470e0 r7 : eda1fc00 r6 : eda53000 r5 : 00000000 r4 : ee88c000 r3 : c0316eec r2 : 00000001 r1 : eda53000 r0 : 6da6c000 Flags: nzcv IRQs on FIQs on Mode SVC_32 ISA ARM Segment none Note the 'ISA ARM' in the last line. Fix this by using the correct name in ENDPROC(). Cc: <stable@vger.kernel.org> Fixes: 10115105cb3a ("ARM: spectre-v2: add firmware based hardening") Reviewed-by: Dave Martin <Dave.Martin@arm.com> Acked-by: Marc Zyngier <marc.zyngier@arm.com> Signed-off-by: Ard Biesheuvel <ard.biesheuvel@linaro.org> Signed-off-by: Russell King <rmk+kernel@armlinux.org.uk> --- arch/arm/mm/proc-v7.S | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm/mm/proc-v7.S b/arch/arm/mm/proc-v7.S index b78d59a1cc059..fccd4d99f505e 100644 --- a/arch/arm/mm/proc-v7.S +++ b/arch/arm/mm/proc-v7.S @@ -112,7 +112,7 @@ ENTRY(cpu_v7_hvc_switch_mm) hvc #0 ldmfd sp!, {r0 - r3} b cpu_v7_switch_mm -ENDPROC(cpu_v7_smc_switch_mm) +ENDPROC(cpu_v7_hvc_switch_mm) #endif ENTRY(cpu_v7_iciallu_switch_mm) mov r3, #0 From 86d4d068df573a8c2105554624796c086d6bec3d Mon Sep 17 00:00:00 2001 From: John David Anglin <dave.anglin@bell.net> Date: Tue, 6 Nov 2018 12:00:01 +0100 Subject: [PATCH 158/443] parisc: Revert "Release spinlocks using ordered store" This reverts commit d27dfa13b9f77ae7e6ed09d70a0426ed26c1a8f9. Unfortunately, this patch needs to be reverted. We need the full sync barrier and not the limited barrier provided by using an ordered store. The sync ensures that all accesses and cache purge instructions that follow the sync are performed after all such instructions prior the sync instruction have completed executing. The patch breaks the rwlock implementation in glibc. This caused the test-lock application in the libprelude testsuite to hang. With the change reverted, the test runs correctly and the libprelude package builds successfully. Signed-off-by: John David Anglin <dave.anglin@bell.net> Signed-off-by: Helge Deller <deller@gmx.de> --- arch/parisc/include/asm/spinlock.h | 4 ++-- arch/parisc/kernel/syscall.S | 12 ++++++++---- 2 files changed, 10 insertions(+), 6 deletions(-) diff --git a/arch/parisc/include/asm/spinlock.h b/arch/parisc/include/asm/spinlock.h index 16aec9ba2580a..8a63515f03bfe 100644 --- a/arch/parisc/include/asm/spinlock.h +++ b/arch/parisc/include/asm/spinlock.h @@ -37,8 +37,8 @@ static inline void arch_spin_unlock(arch_spinlock_t *x) volatile unsigned int *a; a = __ldcw_align(x); - /* Release with ordered store. */ - __asm__ __volatile__("stw,ma %0,0(%1)" : : "r"(1), "r"(a) : "memory"); + mb(); + *a = 1; } static inline int arch_spin_trylock(arch_spinlock_t *x) diff --git a/arch/parisc/kernel/syscall.S b/arch/parisc/kernel/syscall.S index 9505c317818df..a9bc90dc4ae75 100644 --- a/arch/parisc/kernel/syscall.S +++ b/arch/parisc/kernel/syscall.S @@ -640,7 +640,8 @@ cas_action: sub,<> %r28, %r25, %r0 2: stw %r24, 0(%r26) /* Free lock */ - stw,ma %r20, 0(%sr2,%r20) + sync + stw %r20, 0(%sr2,%r20) #if ENABLE_LWS_DEBUG /* Clear thread register indicator */ stw %r0, 4(%sr2,%r20) @@ -654,7 +655,8 @@ cas_action: 3: /* Error occurred on load or store */ /* Free lock */ - stw,ma %r20, 0(%sr2,%r20) + sync + stw %r20, 0(%sr2,%r20) #if ENABLE_LWS_DEBUG stw %r0, 4(%sr2,%r20) #endif @@ -855,7 +857,8 @@ cas2_action: cas2_end: /* Free lock */ - stw,ma %r20, 0(%sr2,%r20) + sync + stw %r20, 0(%sr2,%r20) /* Enable interrupts */ ssm PSW_SM_I, %r0 /* Return to userspace, set no error */ @@ -865,7 +868,8 @@ cas2_end: 22: /* Error occurred on load or store */ /* Free lock */ - stw,ma %r20, 0(%sr2,%r20) + sync + stw %r20, 0(%sr2,%r20) ssm PSW_SM_I, %r0 ldo 1(%r0),%r28 b lws_exit From 8e88c29b351ed4e09dd63f825f1c8260b0cb0ab3 Mon Sep 17 00:00:00 2001 From: Jiri Olsa <jolsa@kernel.org> Date: Sun, 23 Sep 2018 17:04:20 +0200 Subject: [PATCH 159/443] perf tools: Do not zero sample_id_all for group members Andi reported following malfunction: # perf record -e '{ref-cycles,cycles}:S' -a sleep 1 # perf script non matching sample_id_all That's because we disable sample_id_all bit for non-sampling group members. We can't do that, because it needs to be the same over the whole event list. This patch keeps it untouched again. Reported-by: Andi Kleen <andi@firstfloor.org> Tested-by: Andi Kleen <andi@firstfloor.org> Signed-off-by: Jiri Olsa <jolsa@kernel.org> Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com> Cc: Namhyung Kim <namhyung@kernel.org> Cc: Peter Zijlstra <peterz@infradead.org> Link: http://lkml.kernel.org/r/20180923150420.27327-1-jolsa@kernel.org Fixes: e9add8bac6c6 ("perf evsel: Disable write_backward for leader sampling group events") Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com> --- tools/perf/tests/attr/test-record-group-sampling | 1 - tools/perf/util/evsel.c | 1 - 2 files changed, 2 deletions(-) diff --git a/tools/perf/tests/attr/test-record-group-sampling b/tools/perf/tests/attr/test-record-group-sampling index 8a33ca4f9e1f7..f0729c454f160 100644 --- a/tools/perf/tests/attr/test-record-group-sampling +++ b/tools/perf/tests/attr/test-record-group-sampling @@ -37,4 +37,3 @@ sample_freq=0 sample_period=0 freq=0 write_backward=0 -sample_id_all=0 diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c index 6d187059a3736..d37bb1566cd9d 100644 --- a/tools/perf/util/evsel.c +++ b/tools/perf/util/evsel.c @@ -956,7 +956,6 @@ void perf_evsel__config(struct perf_evsel *evsel, struct record_opts *opts, attr->sample_freq = 0; attr->sample_period = 0; attr->write_backward = 0; - attr->sample_id_all = 0; } if (opts->no_samples) From 3fc202e81db70bf60beb9296eefd4e48d6304607 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann <arnd@arndb.de> Date: Fri, 2 Nov 2018 16:14:32 +0100 Subject: [PATCH 160/443] HID: asus: fix build warning wiht CONFIG_ASUS_WMI disabled asus_wmi_evaluate_method() is an empty dummy function when CONFIG_ASUS_WMI is disabled, or not reachable from a built-in device driver. This leads to a theoretical evaluation of an uninitialized variable that the compiler complains about, failing to check that the hardcoded return value makes this an unreachable code path: In file included from include/linux/printk.h:336, from include/linux/kernel.h:14, from include/linux/list.h:9, from include/linux/dmi.h:5, from drivers/hid/hid-asus.c:29: drivers/hid/hid-asus.c: In function 'asus_input_configured': include/linux/dynamic_debug.h:135:3: error: 'value' may be used uninitialized in this function [-Werror=maybe-uninitialized] __dynamic_dev_dbg(&descriptor, dev, fmt, \ ^~~~~~~~~~~~~~~~~ drivers/hid/hid-asus.c:359:6: note: 'value' was declared here u32 value; ^~~~~ With an extra IS_ENABLED() check, the warning goes away. Fixes: 3b692c55e58d ("HID: asus: only support backlight when it's not driven by WMI") Signed-off-by: Arnd Bergmann <arnd@arndb.de> Acked-by: Geert Uytterhoeven <geert@linux-m68k.org> Signed-off-by: Jiri Kosina <jkosina@suse.cz> --- drivers/hid/hid-asus.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/hid/hid-asus.c b/drivers/hid/hid-asus.c index dc6d6477e9611..a1fa2fc8c9b57 100644 --- a/drivers/hid/hid-asus.c +++ b/drivers/hid/hid-asus.c @@ -359,6 +359,9 @@ static bool asus_kbd_wmi_led_control_present(struct hid_device *hdev) u32 value; int ret; + if (!IS_ENABLED(CONFIG_ASUS_WMI)) + return false; + ret = asus_wmi_evaluate_method(ASUS_WMI_METHODID_DSTS2, ASUS_WMI_DEVID_KBD_BACKLIGHT, 0, &value); hid_dbg(hdev, "WMI backlight check: rc %d value %x", ret, value); From aa9b760cec2385ad408bb2e346c7f6dc1be69a79 Mon Sep 17 00:00:00 2001 From: Linus Walleij <linus.walleij@linaro.org> Date: Sun, 4 Nov 2018 11:32:47 +0100 Subject: [PATCH 161/443] HID: fix up .raw_event() documentation The documentation for the .raw_event() callback says that if the driver return 1, there will be no further processing of the event, but this is not true, the actual code in hid-core.c looks like this: if (hdrv && hdrv->raw_event && hid_match_report(hid, report)) { ret = hdrv->raw_event(hid, report, data, size); if (ret < 0) goto unlock; } ret = hid_report_raw_event(hid, type, data, size, interrupt); The only return value that has any effect on the processing is a negative error. Correct this as it seems to confuse people: I found bogus code in the Razer out-of-tree driver attempting to return 1 here. Signed-off-by: Linus Walleij <linus.walleij@linaro.org> Signed-off-by: Jiri Kosina <jkosina@suse.cz> --- include/linux/hid.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/include/linux/hid.h b/include/linux/hid.h index 2827b87590d8d..387c70df6f29c 100644 --- a/include/linux/hid.h +++ b/include/linux/hid.h @@ -722,8 +722,8 @@ struct hid_usage_id { * input will not be passed to raw_event unless hid_device_io_start is * called. * - * raw_event and event should return 0 on no action performed, 1 when no - * further processing should be done and negative on error + * raw_event and event should return negative on error, any other value will + * pass the event on to .event() typically return 0 for success. * * input_mapping shall return a negative value to completely ignore this usage * (e.g. doubled or invalid usage), zero to continue with parsing of this From 8bd66d147c88bd441178c7b4c774ae5a185f19b8 Mon Sep 17 00:00:00 2001 From: "ndesaulniers@google.com" <ndesaulniers@google.com> Date: Wed, 31 Oct 2018 12:39:01 -0700 Subject: [PATCH 162/443] include/linux/compiler*.h: define asm_volatile_goto asm_volatile_goto should also be defined for other compilers that support asm goto. Fixes commit 815f0ddb346c ("include/linux/compiler*.h: make compiler-*.h mutually exclusive"). Signed-off-by: Nick Desaulniers <ndesaulniers@google.com> Signed-off-by: Miguel Ojeda <miguel.ojeda.sandonis@gmail.com> --- include/linux/compiler_types.h | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/include/linux/compiler_types.h b/include/linux/compiler_types.h index 3439d7d0249aa..4a3f9c09c92d0 100644 --- a/include/linux/compiler_types.h +++ b/include/linux/compiler_types.h @@ -130,6 +130,10 @@ struct ftrace_likely_data { # define randomized_struct_fields_end #endif +#ifndef asm_volatile_goto +#define asm_volatile_goto(x...) asm goto(x) +#endif + /* Are two types/vars the same type (ignoring qualifiers)? */ #define __same_type(a, b) __builtin_types_compatible_p(typeof(a), typeof(b)) From 18354b422ce4ce1124277dfe8f4f094bae0102ad Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= <ville.syrjala@linux.intel.com> Date: Tue, 23 Oct 2018 21:21:02 +0300 Subject: [PATCH 163/443] drm/i915: Don't apply the 16Gb DIMM wm latency w/a to BXT/GLK MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The 16Gb DIMM w/a is not applicable to BXT or GLK. Limit it to the appropriate platforms. This was especially harsh on GLK since we don't even try to read the DIMM information on that platforms, hence valid_dimm was always false and thus we always tried to apply the w/a. Furthermore the w/a pushed the level 0 latency above the level 1 latency, which doesn't really make sense. v2: Do the check when populating is_16gb_dimm (Mahesh) Cc: Mahesh Kumar <mahesh1.kumar@intel.com> Cc: Maarten Lankhorst <maarten.lankhorst@linux.intel.com> Fixes: 86b592876cb6 ("drm/i915: Implement 16GB dimm wa for latency level-0") Signed-off-by: Ville Syrjälä <ville.syrjala@linux.intel.com> Link: https://patchwork.freedesktop.org/patch/msgid/20181023182102.31549-1-ville.syrjala@linux.intel.com Reviewed-by: Rodrigo Vivi <rodrigo.vivi@intel.com> Reviewed-by: Mahesh Kumar <mahesh1.sh.kumar@gmail.com> (cherry picked from commit 5d6f36b27d2764f3dc940606ee6b7ec5c669af3e) Signed-off-by: Joonas Lahtinen <joonas.lahtinen@linux.intel.com> --- drivers/gpu/drm/i915/i915_drv.c | 15 ++++++++------- drivers/gpu/drm/i915/i915_drv.h | 1 - drivers/gpu/drm/i915/intel_pm.c | 3 +-- 3 files changed, 9 insertions(+), 10 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c index 44e2c0f5ec502..ffdbbac4400ea 100644 --- a/drivers/gpu/drm/i915/i915_drv.c +++ b/drivers/gpu/drm/i915/i915_drv.c @@ -1175,8 +1175,6 @@ skl_dram_get_channels_info(struct drm_i915_private *dev_priv) return -EINVAL; } - dram_info->valid_dimm = true; - /* * If any of the channel is single rank channel, worst case output * will be same as if single rank memory, so consider single rank @@ -1193,8 +1191,7 @@ skl_dram_get_channels_info(struct drm_i915_private *dev_priv) return -EINVAL; } - if (ch0.is_16gb_dimm || ch1.is_16gb_dimm) - dram_info->is_16gb_dimm = true; + dram_info->is_16gb_dimm = ch0.is_16gb_dimm || ch1.is_16gb_dimm; dev_priv->dram_info.symmetric_memory = intel_is_dram_symmetric(val_ch0, val_ch1, @@ -1314,7 +1311,6 @@ bxt_get_dram_info(struct drm_i915_private *dev_priv) return -EINVAL; } - dram_info->valid_dimm = true; dram_info->valid = true; return 0; } @@ -1327,12 +1323,17 @@ intel_get_dram_info(struct drm_i915_private *dev_priv) int ret; dram_info->valid = false; - dram_info->valid_dimm = false; - dram_info->is_16gb_dimm = false; dram_info->rank = I915_DRAM_RANK_INVALID; dram_info->bandwidth_kbps = 0; dram_info->num_channels = 0; + /* + * Assume 16Gb DIMMs are present until proven otherwise. + * This is only used for the level 0 watermark latency + * w/a which does not apply to bxt/glk. + */ + dram_info->is_16gb_dimm = !IS_GEN9_LP(dev_priv); + if (INTEL_GEN(dev_priv) < 9 || IS_GEMINILAKE(dev_priv)) return; diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index 8624b4bdc242d..9102571e9692d 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -1948,7 +1948,6 @@ struct drm_i915_private { struct dram_info { bool valid; - bool valid_dimm; bool is_16gb_dimm; u8 num_channels; enum dram_rank { diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c index 1db9b83282750..245f0022bcfd0 100644 --- a/drivers/gpu/drm/i915/intel_pm.c +++ b/drivers/gpu/drm/i915/intel_pm.c @@ -2881,8 +2881,7 @@ static void intel_read_wm_latency(struct drm_i915_private *dev_priv, * any underrun. If not able to get Dimm info assume 16GB dimm * to avoid any underrun. */ - if (!dev_priv->dram_info.valid_dimm || - dev_priv->dram_info.is_16gb_dimm) + if (dev_priv->dram_info.is_16gb_dimm) wm[0] += 1; } else if (IS_HASWELL(dev_priv) || IS_BROADWELL(dev_priv)) { From 6503493145cba4413ecd3d4d153faeef4a1e9b85 Mon Sep 17 00:00:00 2001 From: Clint Taylor <clinton.a.taylor@intel.com> Date: Thu, 25 Oct 2018 11:52:00 -0700 Subject: [PATCH 164/443] drm/i915/hdmi: Add HDMI 2.0 audio clock recovery N values HDMI 2.0 594Mhz modes were incorrectly selecting 25.200Mhz Automatic N value mode instead of HDMI specification values. V2: Fix 88.2 Hz N value Cc: Jani Nikula <jani.nikula@linux.intel.com> Cc: stable@vger.kernel.org Signed-off-by: Clint Taylor <clinton.a.taylor@intel.com> Signed-off-by: Jani Nikula <jani.nikula@intel.com> Link: https://patchwork.freedesktop.org/patch/msgid/1540493521-1746-2-git-send-email-clinton.a.taylor@intel.com (cherry picked from commit 5a400aa3c562c4a726b4da286e63c96db905ade1) Signed-off-by: Joonas Lahtinen <joonas.lahtinen@linux.intel.com> --- drivers/gpu/drm/i915/intel_audio.c | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) diff --git a/drivers/gpu/drm/i915/intel_audio.c b/drivers/gpu/drm/i915/intel_audio.c index 769f3f5866611..ee3ca2de983b9 100644 --- a/drivers/gpu/drm/i915/intel_audio.c +++ b/drivers/gpu/drm/i915/intel_audio.c @@ -144,6 +144,9 @@ static const struct { /* HDMI N/CTS table */ #define TMDS_297M 297000 #define TMDS_296M 296703 +#define TMDS_594M 594000 +#define TMDS_593M 593407 + static const struct { int sample_rate; int clock; @@ -164,6 +167,20 @@ static const struct { { 176400, TMDS_297M, 18816, 247500 }, { 192000, TMDS_296M, 23296, 281250 }, { 192000, TMDS_297M, 20480, 247500 }, + { 44100, TMDS_593M, 8918, 937500 }, + { 44100, TMDS_594M, 9408, 990000 }, + { 48000, TMDS_593M, 5824, 562500 }, + { 48000, TMDS_594M, 6144, 594000 }, + { 32000, TMDS_593M, 5824, 843750 }, + { 32000, TMDS_594M, 3072, 445500 }, + { 88200, TMDS_593M, 17836, 937500 }, + { 88200, TMDS_594M, 18816, 990000 }, + { 96000, TMDS_593M, 11648, 562500 }, + { 96000, TMDS_594M, 12288, 594000 }, + { 176400, TMDS_593M, 35672, 937500 }, + { 176400, TMDS_594M, 37632, 990000 }, + { 192000, TMDS_593M, 23296, 562500 }, + { 192000, TMDS_594M, 24576, 594000 }, }; /* get AUD_CONFIG_PIXEL_CLOCK_HDMI_* value for mode */ From c58281056a8b26d5d9dc15c19859a7880835ef44 Mon Sep 17 00:00:00 2001 From: Chris Wilson <chris@chris-wilson.co.uk> Date: Thu, 25 Oct 2018 10:18:22 +0100 Subject: [PATCH 165/443] drm/i915: Mark up GTT sizes as u64 Since we use a 64b virtual GTT irrespective of the system, we want to ensure that the GTT computations remains 64b even on 32b systems, including treatment of huge virtual pages. No code generation changes on 64b: Reported-by: Sergii Romantsov <sergii.romantsov@globallogic.com> Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=108282 Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> Cc: Joonas Lahtinen <joonas.lahtinen@linux.intel.com> Cc: stable@vger.kernel.org Reviewed-by: Matthew Auld <matthew.auld@intel.com> Link: https://patchwork.freedesktop.org/patch/msgid/20181025091823.20571-1-chris@chris-wilson.co.uk (cherry picked from commit 9125963a9494253fa5a29cc1b4169885d2be7042) Signed-off-by: Joonas Lahtinen <joonas.lahtinen@linux.intel.com> --- drivers/gpu/drm/i915/i915_gem_gtt.c | 2 +- drivers/gpu/drm/i915/i915_gem_gtt.h | 6 +++--- drivers/gpu/drm/i915/selftests/huge_pages.c | 2 +- drivers/gpu/drm/i915/selftests/i915_gem_gtt.c | 6 +++--- 4 files changed, 8 insertions(+), 8 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c index 56c7f86373112..47c3025437990 100644 --- a/drivers/gpu/drm/i915/i915_gem_gtt.c +++ b/drivers/gpu/drm/i915/i915_gem_gtt.c @@ -1757,7 +1757,7 @@ static void gen6_dump_ppgtt(struct i915_hw_ppgtt *base, struct seq_file *m) if (i == 4) continue; - seq_printf(m, "\t\t(%03d, %04d) %08lx: ", + seq_printf(m, "\t\t(%03d, %04d) %08llx: ", pde, pte, (pde * GEN6_PTES + pte) * I915_GTT_PAGE_SIZE); for (i = 0; i < 4; i++) { diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.h b/drivers/gpu/drm/i915/i915_gem_gtt.h index 7e2af5f4f39bc..aa83070430361 100644 --- a/drivers/gpu/drm/i915/i915_gem_gtt.h +++ b/drivers/gpu/drm/i915/i915_gem_gtt.h @@ -42,9 +42,9 @@ #include "i915_selftest.h" #include "i915_timeline.h" -#define I915_GTT_PAGE_SIZE_4K BIT(12) -#define I915_GTT_PAGE_SIZE_64K BIT(16) -#define I915_GTT_PAGE_SIZE_2M BIT(21) +#define I915_GTT_PAGE_SIZE_4K BIT_ULL(12) +#define I915_GTT_PAGE_SIZE_64K BIT_ULL(16) +#define I915_GTT_PAGE_SIZE_2M BIT_ULL(21) #define I915_GTT_PAGE_SIZE I915_GTT_PAGE_SIZE_4K #define I915_GTT_MAX_PAGE_SIZE I915_GTT_PAGE_SIZE_2M diff --git a/drivers/gpu/drm/i915/selftests/huge_pages.c b/drivers/gpu/drm/i915/selftests/huge_pages.c index 8d03f64eabd71..5c22f2c8d4cfe 100644 --- a/drivers/gpu/drm/i915/selftests/huge_pages.c +++ b/drivers/gpu/drm/i915/selftests/huge_pages.c @@ -551,7 +551,7 @@ static int igt_mock_ppgtt_misaligned_dma(void *arg) err = igt_check_page_sizes(vma); if (vma->page_sizes.gtt != I915_GTT_PAGE_SIZE_4K) { - pr_err("page_sizes.gtt=%u, expected %lu\n", + pr_err("page_sizes.gtt=%u, expected %llu\n", vma->page_sizes.gtt, I915_GTT_PAGE_SIZE_4K); err = -EINVAL; } diff --git a/drivers/gpu/drm/i915/selftests/i915_gem_gtt.c b/drivers/gpu/drm/i915/selftests/i915_gem_gtt.c index 8e2e269db97e8..127d815136717 100644 --- a/drivers/gpu/drm/i915/selftests/i915_gem_gtt.c +++ b/drivers/gpu/drm/i915/selftests/i915_gem_gtt.c @@ -1337,7 +1337,7 @@ static int igt_gtt_reserve(void *arg) GEM_BUG_ON(!drm_mm_node_allocated(&vma->node)); if (vma->node.start != total || vma->node.size != 2*I915_GTT_PAGE_SIZE) { - pr_err("i915_gem_gtt_reserve (pass 1) placement failed, found (%llx + %llx), expected (%llx + %lx)\n", + pr_err("i915_gem_gtt_reserve (pass 1) placement failed, found (%llx + %llx), expected (%llx + %llx)\n", vma->node.start, vma->node.size, total, 2*I915_GTT_PAGE_SIZE); err = -EINVAL; @@ -1386,7 +1386,7 @@ static int igt_gtt_reserve(void *arg) GEM_BUG_ON(!drm_mm_node_allocated(&vma->node)); if (vma->node.start != total || vma->node.size != 2*I915_GTT_PAGE_SIZE) { - pr_err("i915_gem_gtt_reserve (pass 2) placement failed, found (%llx + %llx), expected (%llx + %lx)\n", + pr_err("i915_gem_gtt_reserve (pass 2) placement failed, found (%llx + %llx), expected (%llx + %llx)\n", vma->node.start, vma->node.size, total, 2*I915_GTT_PAGE_SIZE); err = -EINVAL; @@ -1430,7 +1430,7 @@ static int igt_gtt_reserve(void *arg) GEM_BUG_ON(!drm_mm_node_allocated(&vma->node)); if (vma->node.start != offset || vma->node.size != 2*I915_GTT_PAGE_SIZE) { - pr_err("i915_gem_gtt_reserve (pass 3) placement failed, found (%llx + %llx), expected (%llx + %lx)\n", + pr_err("i915_gem_gtt_reserve (pass 3) placement failed, found (%llx + %llx), expected (%llx + %llx)\n", vma->node.start, vma->node.size, offset, 2*I915_GTT_PAGE_SIZE); err = -EINVAL; From 085603287452fc96376ed4888bf29f8c095d2b40 Mon Sep 17 00:00:00 2001 From: Chris Wilson <chris@chris-wilson.co.uk> Date: Thu, 25 Oct 2018 10:18:23 +0100 Subject: [PATCH 166/443] drm/i915: Compare user's 64b GTT offset even on 32b Beware mixing unsigned long constants and 64b values, as on 32b the constant will be zero extended and discard the high 32b when used as a mask! Reported-by: Sergii Romantsov <sergii.romantsov@globallogic.com> Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=108282 Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> Cc: Joonas Lahtinen <joonas.lahtinen@linux.intel.com> Cc: stable@vger.kernel.org Reviewed-by: Matthew Auld <matthew.auld@intel.com> Link: https://patchwork.freedesktop.org/patch/msgid/20181025091823.20571-2-chris@chris-wilson.co.uk (cherry picked from commit 6fc4e48f9ed46e9adff236a0c350074aafa3b7fa) Signed-off-by: Joonas Lahtinen <joonas.lahtinen@linux.intel.com> --- drivers/gpu/drm/i915/gvt/gtt.h | 1 - drivers/gpu/drm/i915/i915_gem_execbuffer.c | 2 +- drivers/gpu/drm/i915/i915_gem_gtt.h | 2 ++ 3 files changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/i915/gvt/gtt.h b/drivers/gpu/drm/i915/gvt/gtt.h index 7a9b36176efb7..bfb6f652b09fc 100644 --- a/drivers/gpu/drm/i915/gvt/gtt.h +++ b/drivers/gpu/drm/i915/gvt/gtt.h @@ -35,7 +35,6 @@ #define _GVT_GTT_H_ #define I915_GTT_PAGE_SHIFT 12 -#define I915_GTT_PAGE_MASK (~(I915_GTT_PAGE_SIZE - 1)) struct intel_vgpu_mm; diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c index 09187286d3462..1aaccbe7e1deb 100644 --- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c +++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c @@ -460,7 +460,7 @@ eb_validate_vma(struct i915_execbuffer *eb, * any non-page-aligned or non-canonical addresses. */ if (unlikely(entry->flags & EXEC_OBJECT_PINNED && - entry->offset != gen8_canonical_addr(entry->offset & PAGE_MASK))) + entry->offset != gen8_canonical_addr(entry->offset & I915_GTT_PAGE_MASK))) return -EINVAL; /* pad_to_size was once a reserved field, so sanitize it */ diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.h b/drivers/gpu/drm/i915/i915_gem_gtt.h index aa83070430361..5d2c5ba55ad83 100644 --- a/drivers/gpu/drm/i915/i915_gem_gtt.h +++ b/drivers/gpu/drm/i915/i915_gem_gtt.h @@ -49,6 +49,8 @@ #define I915_GTT_PAGE_SIZE I915_GTT_PAGE_SIZE_4K #define I915_GTT_MAX_PAGE_SIZE I915_GTT_PAGE_SIZE_2M +#define I915_GTT_PAGE_MASK -I915_GTT_PAGE_SIZE + #define I915_GTT_MIN_ALIGNMENT I915_GTT_PAGE_SIZE #define I915_FENCE_REG_NONE -1 From 2c2f6e30d5f29691e3563d334ce208d3a1907f49 Mon Sep 17 00:00:00 2001 From: Rodrigo Vivi <rodrigo.vivi@intel.com> Date: Thu, 25 Oct 2018 17:56:36 -0700 Subject: [PATCH 167/443] drm/i915/glk: Remove 99% limitation. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit While checking the opportunity to add a display_gen check to allow glk and cnl to be on same bucket I noticed these FIXME cases here. So I got the confirmation from HW architect that we actually never needed this workaround. "GLK supports 2 pixel per clock, so pixel clock can be up to 2 * cdclk." So, this reverts commit 97f55ca5b662 ("drm/i915/glk: limit pixel clock to 99% of cdclk workaround") Fixes: 97f55ca5b662 ("drm/i915/glk: limit pixel clock to 99% of cdclk workaround") Cc: Ville Syrjälä <ville.syrjala@linux.intel.com> Cc: Madhav Chauhan <madhav.chauhan@intel.com> Cc: Jani Nikula <jani.nikula@intel.com> Cc: Clinton Taylor <clinton.a.taylor@intel.com> Cc: Arthur J Runyan <arthur.j.runyan@intel.com> Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com> Reviewed-by: Ville Syrjälä <ville.syrjala@linux.intel.com> Link: https://patchwork.freedesktop.org/patch/msgid/20181026005636.22274-1-rodrigo.vivi@intel.com (cherry picked from commit 42882336e62aab00278114392a16374f272a0c99) Signed-off-by: Joonas Lahtinen <joonas.lahtinen@linux.intel.com> --- drivers/gpu/drm/i915/intel_cdclk.c | 18 ++---------------- 1 file changed, 2 insertions(+), 16 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_cdclk.c b/drivers/gpu/drm/i915/intel_cdclk.c index 29075c7634280..8d74276029e62 100644 --- a/drivers/gpu/drm/i915/intel_cdclk.c +++ b/drivers/gpu/drm/i915/intel_cdclk.c @@ -2138,16 +2138,8 @@ void intel_set_cdclk(struct drm_i915_private *dev_priv, static int intel_pixel_rate_to_cdclk(struct drm_i915_private *dev_priv, int pixel_rate) { - if (INTEL_GEN(dev_priv) >= 10) + if (INTEL_GEN(dev_priv) >= 10 || IS_GEMINILAKE(dev_priv)) return DIV_ROUND_UP(pixel_rate, 2); - else if (IS_GEMINILAKE(dev_priv)) - /* - * FIXME: Avoid using a pixel clock that is more than 99% of the cdclk - * as a temporary workaround. Use a higher cdclk instead. (Note that - * intel_compute_max_dotclk() limits the max pixel clock to 99% of max - * cdclk.) - */ - return DIV_ROUND_UP(pixel_rate * 100, 2 * 99); else if (IS_GEN9(dev_priv) || IS_BROADWELL(dev_priv) || IS_HASWELL(dev_priv)) return pixel_rate; @@ -2543,14 +2535,8 @@ static int intel_compute_max_dotclk(struct drm_i915_private *dev_priv) { int max_cdclk_freq = dev_priv->max_cdclk_freq; - if (INTEL_GEN(dev_priv) >= 10) + if (INTEL_GEN(dev_priv) >= 10 || IS_GEMINILAKE(dev_priv)) return 2 * max_cdclk_freq; - else if (IS_GEMINILAKE(dev_priv)) - /* - * FIXME: Limiting to 99% as a temporary workaround. See - * intel_min_cdclk() for details. - */ - return 2 * max_cdclk_freq * 99 / 100; else if (IS_GEN9(dev_priv) || IS_BROADWELL(dev_priv) || IS_HASWELL(dev_priv)) return max_cdclk_freq; From 76271ef2638ca8e4bf2884cad664a34be0d5a42b Mon Sep 17 00:00:00 2001 From: Dhinakaran Pandiyan <dhinakaran.pandiyan@intel.com> Date: Fri, 5 Oct 2018 11:56:42 -0700 Subject: [PATCH 168/443] drm/i915: Fix VIDEO_DIP_CTL bit shifts The shifts for VSC_SELECT bits are wrong, fix it. Good thing is the definitions are unused. v2: Moves definitions in another patch (Manasi) Cc: Manasi Navare <manasi.d.navare@intel.com> Cc: Anusha Srivatsa <anusha.srivatsa@intel.com> Cc: Rodrigo Vivi <rodrigo.vivi@intel.com> Fixes: 7af2be6d54d4 ("drm/i915/icl: Add VIDEO_DIP registers") Signed-off-by: Dhinakaran Pandiyan <dhinakaran.pandiyan@intel.com> Reviewed-by: Manasi Navare <manasi.d.navare@intel.com> Link: https://patchwork.freedesktop.org/patch/msgid/20181005185643.31660-1-dhinakaran.pandiyan@intel.com (cherry picked from commit 09209662618f9fdc38b8d4da39040c8829fd2d57) Signed-off-by: Joonas Lahtinen <joonas.lahtinen@linux.intel.com> --- drivers/gpu/drm/i915/i915_reg.h | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h index 7c491ea3d052a..f2d92aee721bd 100644 --- a/drivers/gpu/drm/i915/i915_reg.h +++ b/drivers/gpu/drm/i915/i915_reg.h @@ -4593,12 +4593,12 @@ enum { #define DRM_DIP_ENABLE (1 << 28) #define PSR_VSC_BIT_7_SET (1 << 27) -#define VSC_SELECT_MASK (0x3 << 26) -#define VSC_SELECT_SHIFT 26 -#define VSC_DIP_HW_HEA_DATA (0 << 26) -#define VSC_DIP_HW_HEA_SW_DATA (1 << 26) -#define VSC_DIP_HW_DATA_SW_HEA (2 << 26) -#define VSC_DIP_SW_HEA_DATA (3 << 26) +#define VSC_SELECT_MASK (0x3 << 25) +#define VSC_SELECT_SHIFT 25 +#define VSC_DIP_HW_HEA_DATA (0 << 25) +#define VSC_DIP_HW_HEA_SW_DATA (1 << 25) +#define VSC_DIP_HW_DATA_SW_HEA (2 << 25) +#define VSC_DIP_SW_HEA_DATA (3 << 25) #define VDIP_ENABLE_PPS (1 << 24) /* Panel power sequencing */ From f42f343887016330b321dd40eebc68c7292e4f1b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= <ville.syrjala@linux.intel.com> Date: Mon, 29 Oct 2018 16:00:31 +0200 Subject: [PATCH 169/443] drm/i915: Fix error handling for the NV12 fb dimensions check MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Let's not leak obj->framebuffer_references when we decide that the framebuffer domensions are not suitable for NV12. Cc: stable@vger.kernel.org Cc: Maarten Lankhorst <maarten.lankhorst@linux.intel.com> Cc: Vidya Srinivas <vidya.srinivas@intel.com> Fixes: e44134f2673c ("drm/i915: Add NV12 support to intel_framebuffer_init") Signed-off-by: Ville Syrjälä <ville.syrjala@linux.intel.com> Link: https://patchwork.freedesktop.org/patch/msgid/20181029140031.11765-1-ville.syrjala@linux.intel.com Reviewed-by: Matt Roper <matthew.d.roper@intel.com> (cherry picked from commit 3b90946fcb6f13b65888c380461793a9dea9d1f4) Signed-off-by: Joonas Lahtinen <joonas.lahtinen@linux.intel.com> --- drivers/gpu/drm/i915/intel_display.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c index 9741cc419e1b2..b8dfdbc9ca1f6 100644 --- a/drivers/gpu/drm/i915/intel_display.c +++ b/drivers/gpu/drm/i915/intel_display.c @@ -14646,7 +14646,7 @@ static int intel_framebuffer_init(struct intel_framebuffer *intel_fb, fb->height < SKL_MIN_YUV_420_SRC_H || (fb->width % 4) != 0 || (fb->height % 4) != 0)) { DRM_DEBUG_KMS("src dimensions not correct for NV12\n"); - return -EINVAL; + goto err; } for (i = 0; i < fb->format->num_planes; i++) { From e528c2affcf216b3d02b22004895cb678769629b Mon Sep 17 00:00:00 2001 From: Manasi Navare <manasi.d.navare@intel.com> Date: Tue, 23 Oct 2018 12:12:47 -0700 Subject: [PATCH 170/443] drm/i915/icl: Fix the macros for DFLEXDPMLE register bits MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This patch fixes the macros used for defining the DFLEXDPMLE register bit fields. This accounts for changes in the spec. Fixes: a2bc69a1a9d6 ("drm/i915/icl: Add register definition for DFLEXDPMLE") Cc: Animesh Manna <animesh.manna@intel.com> Cc: Paulo Zanoni <paulo.r.zanoni@intel.com> Cc: Jose Roberto de Souza <jose.souza@intel.com> Cc: <stable@vger.kernel.org> # v4.19+ Signed-off-by: Manasi Navare <manasi.d.navare@intel.com> Reviewed-by: José Roberto de Souza <jose.souza@intel.com> Link: https://patchwork.freedesktop.org/patch/msgid/20181023191248.26418-1-manasi.d.navare@intel.com (cherry picked from commit b4335ec0a3ee6229a570755f8fb95dc8a7c694f2) Signed-off-by: Joonas Lahtinen <joonas.lahtinen@linux.intel.com> --- drivers/gpu/drm/i915/i915_reg.h | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h index f2d92aee721bd..e31c27e45734e 100644 --- a/drivers/gpu/drm/i915/i915_reg.h +++ b/drivers/gpu/drm/i915/i915_reg.h @@ -2095,8 +2095,12 @@ enum i915_power_well_id { /* ICL PHY DFLEX registers */ #define PORT_TX_DFLEXDPMLE1 _MMIO(0x1638C0) -#define DFLEXDPMLE1_DPMLETC_MASK(n) (0xf << (4 * (n))) -#define DFLEXDPMLE1_DPMLETC(n, x) ((x) << (4 * (n))) +#define DFLEXDPMLE1_DPMLETC_MASK(tc_port) (0xf << (4 * (tc_port))) +#define DFLEXDPMLE1_DPMLETC_ML0(tc_port) (1 << (4 * (tc_port))) +#define DFLEXDPMLE1_DPMLETC_ML1_0(tc_port) (3 << (4 * (tc_port))) +#define DFLEXDPMLE1_DPMLETC_ML3(tc_port) (8 << (4 * (tc_port))) +#define DFLEXDPMLE1_DPMLETC_ML3_2(tc_port) (12 << (4 * (tc_port))) +#define DFLEXDPMLE1_DPMLETC_ML3_0(tc_port) (15 << (4 * (tc_port))) /* BXT PHY Ref registers */ #define _PORT_REF_DW3_A 0x16218C From 0014868b9c3c1dda1de6711cf58c3486fb422d07 Mon Sep 17 00:00:00 2001 From: Chris Wilson <chris@chris-wilson.co.uk> Date: Fri, 2 Nov 2018 16:12:09 +0000 Subject: [PATCH 171/443] drm/i915: Mark pin flags as u64 Since the flags are being used to operate on a u64 variable, they too need to be marked as such so that the inverses are full width (and not zero extended on 32b kernels and bdw+). Reported-by: Sergii Romantsov <sergii.romantsov@globallogic.com> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> Cc: stable@vger.kernel.org Reviewed-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com> Reviewed-by: Michal Wajdeczko <michal.wajdeczko@intel.com> Link: https://patchwork.freedesktop.org/patch/msgid/20181102161232.17742-2-chris@chris-wilson.co.uk (cherry picked from commit 83b466b1dc5f0b4d33f0a901e8b00197a8f3582d) Signed-off-by: Joonas Lahtinen <joonas.lahtinen@linux.intel.com> --- drivers/gpu/drm/i915/i915_gem_gtt.h | 28 ++++++++++++++-------------- 1 file changed, 14 insertions(+), 14 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.h b/drivers/gpu/drm/i915/i915_gem_gtt.h index 5d2c5ba55ad83..28039290655cb 100644 --- a/drivers/gpu/drm/i915/i915_gem_gtt.h +++ b/drivers/gpu/drm/i915/i915_gem_gtt.h @@ -661,20 +661,20 @@ int i915_gem_gtt_insert(struct i915_address_space *vm, u64 start, u64 end, unsigned int flags); /* Flags used by pin/bind&friends. */ -#define PIN_NONBLOCK BIT(0) -#define PIN_MAPPABLE BIT(1) -#define PIN_ZONE_4G BIT(2) -#define PIN_NONFAULT BIT(3) -#define PIN_NOEVICT BIT(4) - -#define PIN_MBZ BIT(5) /* I915_VMA_PIN_OVERFLOW */ -#define PIN_GLOBAL BIT(6) /* I915_VMA_GLOBAL_BIND */ -#define PIN_USER BIT(7) /* I915_VMA_LOCAL_BIND */ -#define PIN_UPDATE BIT(8) - -#define PIN_HIGH BIT(9) -#define PIN_OFFSET_BIAS BIT(10) -#define PIN_OFFSET_FIXED BIT(11) +#define PIN_NONBLOCK BIT_ULL(0) +#define PIN_MAPPABLE BIT_ULL(1) +#define PIN_ZONE_4G BIT_ULL(2) +#define PIN_NONFAULT BIT_ULL(3) +#define PIN_NOEVICT BIT_ULL(4) + +#define PIN_MBZ BIT_ULL(5) /* I915_VMA_PIN_OVERFLOW */ +#define PIN_GLOBAL BIT_ULL(6) /* I915_VMA_GLOBAL_BIND */ +#define PIN_USER BIT_ULL(7) /* I915_VMA_LOCAL_BIND */ +#define PIN_UPDATE BIT_ULL(8) + +#define PIN_HIGH BIT_ULL(9) +#define PIN_OFFSET_BIAS BIT_ULL(10) +#define PIN_OFFSET_FIXED BIT_ULL(11) #define PIN_OFFSET_MASK (-I915_GTT_PAGE_SIZE) #endif From 6a8915d0f8cf323e1beb792a33095cf652db4056 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= <ville.syrjala@linux.intel.com> Date: Mon, 5 Nov 2018 21:46:04 +0200 Subject: [PATCH 172/443] drm/i915: Don't oops during modeset shutdown after lpe audio deinit MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit We deinit the lpe audio device before we call drm_atomic_helper_shutdown(), which means the platform device may already be gone when it comes time to shut down the crtc. As we don't know when the last reference to the platform device gets dropped by the audio driver we can't assume that the device and its data are still around when turning off the crtc. Mark the platform device as gone as soon as we do the audio deinit. Cc: stable@vger.kernel.org Signed-off-by: Ville Syrjälä <ville.syrjala@linux.intel.com> Link: https://patchwork.freedesktop.org/patch/msgid/20181105194604.6994-1-ville.syrjala@linux.intel.com Reviewed-by: Chris Wilson <chris@chris-wilson.co.uk> (cherry picked from commit f45a7977d1140c11f334e01a9f77177ed68e3bfa) Signed-off-by: Joonas Lahtinen <joonas.lahtinen@linux.intel.com> --- drivers/gpu/drm/i915/intel_lpe_audio.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/intel_lpe_audio.c b/drivers/gpu/drm/i915/intel_lpe_audio.c index cdf19553ffacd..5d5336fbe7b05 100644 --- a/drivers/gpu/drm/i915/intel_lpe_audio.c +++ b/drivers/gpu/drm/i915/intel_lpe_audio.c @@ -297,8 +297,10 @@ void intel_lpe_audio_teardown(struct drm_i915_private *dev_priv) lpe_audio_platdev_destroy(dev_priv); irq_free_desc(dev_priv->lpe_audio.irq); -} + dev_priv->lpe_audio.irq = -1; + dev_priv->lpe_audio.platdev = NULL; +} /** * intel_lpe_audio_notify() - notify lpe audio event From 506481b20e818db40b6198815904ecd2d6daee64 Mon Sep 17 00:00:00 2001 From: Robbie Ko <robbieko@synology.com> Date: Tue, 30 Oct 2018 18:04:04 +0800 Subject: [PATCH 173/443] Btrfs: fix cur_offset in the error case for nocow When the cow_file_range fails, the related resources are unlocked according to the range [start..end), so the unlock cannot be repeated in run_delalloc_nocow. In some cases (e.g. cur_offset <= end && cow_start != -1), cur_offset is not updated correctly, so move the cur_offset update before cow_file_range. kernel BUG at mm/page-writeback.c:2663! Internal error: Oops - BUG: 0 [#1] SMP CPU: 3 PID: 31525 Comm: kworker/u8:7 Tainted: P O Hardware name: Realtek_RTD1296 (DT) Workqueue: writeback wb_workfn (flush-btrfs-1) task: ffffffc076db3380 ti: ffffffc02e9ac000 task.ti: ffffffc02e9ac000 PC is at clear_page_dirty_for_io+0x1bc/0x1e8 LR is at clear_page_dirty_for_io+0x14/0x1e8 pc : [<ffffffc00033c91c>] lr : [<ffffffc00033c774>] pstate: 40000145 sp : ffffffc02e9af4f0 Process kworker/u8:7 (pid: 31525, stack limit = 0xffffffc02e9ac020) Call trace: [<ffffffc00033c91c>] clear_page_dirty_for_io+0x1bc/0x1e8 [<ffffffbffc514674>] extent_clear_unlock_delalloc+0x1e4/0x210 [btrfs] [<ffffffbffc4fb168>] run_delalloc_nocow+0x3b8/0x948 [btrfs] [<ffffffbffc4fb948>] run_delalloc_range+0x250/0x3a8 [btrfs] [<ffffffbffc514c0c>] writepage_delalloc.isra.21+0xbc/0x1d8 [btrfs] [<ffffffbffc516048>] __extent_writepage+0xe8/0x248 [btrfs] [<ffffffbffc51630c>] extent_write_cache_pages.isra.17+0x164/0x378 [btrfs] [<ffffffbffc5185a8>] extent_writepages+0x48/0x68 [btrfs] [<ffffffbffc4f5828>] btrfs_writepages+0x20/0x30 [btrfs] [<ffffffc00033d758>] do_writepages+0x30/0x88 [<ffffffc0003ba0f4>] __writeback_single_inode+0x34/0x198 [<ffffffc0003ba6c4>] writeback_sb_inodes+0x184/0x3c0 [<ffffffc0003ba96c>] __writeback_inodes_wb+0x6c/0xc0 [<ffffffc0003bac20>] wb_writeback+0x1b8/0x1c0 [<ffffffc0003bb0f0>] wb_workfn+0x150/0x250 [<ffffffc0002b0014>] process_one_work+0x1dc/0x388 [<ffffffc0002b02f0>] worker_thread+0x130/0x500 [<ffffffc0002b6344>] kthread+0x10c/0x110 [<ffffffc000284590>] ret_from_fork+0x10/0x40 Code: d503201f a9025bb5 a90363b7 f90023b9 (d4210000) CC: stable@vger.kernel.org # 4.4+ Reviewed-by: Filipe Manana <fdmanana@suse.com> Signed-off-by: Robbie Ko <robbieko@synology.com> Signed-off-by: David Sterba <dsterba@suse.com> --- fs/btrfs/inode.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index f4d31fd62eed7..55761b1519f57 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -1531,12 +1531,11 @@ static noinline int run_delalloc_nocow(struct inode *inode, } btrfs_release_path(path); - if (cur_offset <= end && cow_start == (u64)-1) { + if (cur_offset <= end && cow_start == (u64)-1) cow_start = cur_offset; - cur_offset = end; - } if (cow_start != (u64)-1) { + cur_offset = end; ret = cow_file_range(inode, locked_page, cow_start, end, end, page_started, nr_written, 1, NULL); if (ret) From fcd5e74288f7d36991b1f0fb96b8c57079645e38 Mon Sep 17 00:00:00 2001 From: Lu Fengqi <lufq.fnst@cn.fujitsu.com> Date: Wed, 24 Oct 2018 20:24:03 +0800 Subject: [PATCH 174/443] btrfs: fix pinned underflow after transaction aborted When running generic/475, we may get the following warning in dmesg: [ 6902.102154] WARNING: CPU: 3 PID: 18013 at fs/btrfs/extent-tree.c:9776 btrfs_free_block_groups+0x2af/0x3b0 [btrfs] [ 6902.109160] CPU: 3 PID: 18013 Comm: umount Tainted: G W O 4.19.0-rc8+ #8 [ 6902.110971] Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS 0.0.0 02/06/2015 [ 6902.112857] RIP: 0010:btrfs_free_block_groups+0x2af/0x3b0 [btrfs] [ 6902.118921] RSP: 0018:ffffc9000459bdb0 EFLAGS: 00010286 [ 6902.120315] RAX: ffff880175050bb0 RBX: ffff8801124a8000 RCX: 0000000000170007 [ 6902.121969] RDX: 0000000000000002 RSI: 0000000000170007 RDI: ffffffff8125fb74 [ 6902.123716] RBP: ffff880175055d10 R08: 0000000000000000 R09: 0000000000000000 [ 6902.125417] R10: 0000000000000000 R11: 0000000000000000 R12: ffff880175055d88 [ 6902.127129] R13: ffff880175050bb0 R14: 0000000000000000 R15: dead000000000100 [ 6902.129060] FS: 00007f4507223780(0000) GS:ffff88017ba00000(0000) knlGS:0000000000000000 [ 6902.130996] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ 6902.132558] CR2: 00005623599cac78 CR3: 000000014b700001 CR4: 00000000003606e0 [ 6902.134270] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 [ 6902.135981] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 [ 6902.137836] Call Trace: [ 6902.138939] close_ctree+0x171/0x330 [btrfs] [ 6902.140181] ? kthread_stop+0x146/0x1f0 [ 6902.141277] generic_shutdown_super+0x6c/0x100 [ 6902.142517] kill_anon_super+0x14/0x30 [ 6902.143554] btrfs_kill_super+0x13/0x100 [btrfs] [ 6902.144790] deactivate_locked_super+0x2f/0x70 [ 6902.146014] cleanup_mnt+0x3b/0x70 [ 6902.147020] task_work_run+0x9e/0xd0 [ 6902.148036] do_syscall_64+0x470/0x600 [ 6902.149142] ? trace_hardirqs_off_thunk+0x1a/0x1c [ 6902.150375] entry_SYSCALL_64_after_hwframe+0x49/0xbe [ 6902.151640] RIP: 0033:0x7f45077a6a7b [ 6902.157324] RSP: 002b:00007ffd589f3e68 EFLAGS: 00000246 ORIG_RAX: 00000000000000a6 [ 6902.159187] RAX: 0000000000000000 RBX: 000055e8eec732b0 RCX: 00007f45077a6a7b [ 6902.160834] RDX: 0000000000000001 RSI: 0000000000000000 RDI: 000055e8eec73490 [ 6902.162526] RBP: 0000000000000000 R08: 000055e8eec734b0 R09: 00007ffd589f26c0 [ 6902.164141] R10: 0000000000000000 R11: 0000000000000246 R12: 000055e8eec73490 [ 6902.165815] R13: 00007f4507ac61a4 R14: 0000000000000000 R15: 00007ffd589f40d8 [ 6902.167553] irq event stamp: 0 [ 6902.168998] hardirqs last enabled at (0): [<0000000000000000>] (null) [ 6902.170731] hardirqs last disabled at (0): [<ffffffff810cd810>] copy_process.part.55+0x3b0/0x1f00 [ 6902.172773] softirqs last enabled at (0): [<ffffffff810cd810>] copy_process.part.55+0x3b0/0x1f00 [ 6902.174671] softirqs last disabled at (0): [<0000000000000000>] (null) [ 6902.176407] ---[ end trace 463138c2986b275c ]--- [ 6902.177636] BTRFS info (device dm-3): space_info 4 has 273465344 free, is not full [ 6902.179453] BTRFS info (device dm-3): space_info total=276824064, used=4685824, pinned=18446744073708158976, reserved=0, may_use=0, readonly=65536 In the above line there's "pinned=18446744073708158976" which is an unsigned u64 value of -1392640, an obvious underflow. When transaction_kthread is running cleanup_transaction(), another fsstress is running btrfs_commit_transaction(). The btrfs_finish_extent_commit() may get the same range as btrfs_destroy_pinned_extent() got, which causes the pinned underflow. Fixes: d4b450cd4b33 ("Btrfs: fix race between transaction commit and empty block group removal") CC: stable@vger.kernel.org # 4.4+ Reviewed-by: Josef Bacik <josef@toxicpanda.com> Signed-off-by: Lu Fengqi <lufq.fnst@cn.fujitsu.com> Reviewed-by: David Sterba <dsterba@suse.com> Signed-off-by: David Sterba <dsterba@suse.com> --- fs/btrfs/disk-io.c | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index b0ab41da91d12..00ee5e37e9897 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -4359,13 +4359,23 @@ static int btrfs_destroy_pinned_extent(struct btrfs_fs_info *fs_info, unpin = pinned_extents; again: while (1) { + /* + * The btrfs_finish_extent_commit() may get the same range as + * ours between find_first_extent_bit and clear_extent_dirty. + * Hence, hold the unused_bg_unpin_mutex to avoid double unpin + * the same extent range. + */ + mutex_lock(&fs_info->unused_bg_unpin_mutex); ret = find_first_extent_bit(unpin, 0, &start, &end, EXTENT_DIRTY, NULL); - if (ret) + if (ret) { + mutex_unlock(&fs_info->unused_bg_unpin_mutex); break; + } clear_extent_dirty(unpin, start, end); btrfs_error_unpin_extent_range(fs_info, start, end); + mutex_unlock(&fs_info->unused_bg_unpin_mutex); cond_resched(); } From 008c6753f7e070c77c70d708a6bf0255b4381763 Mon Sep 17 00:00:00 2001 From: Filipe Manana <fdmanana@suse.com> Date: Mon, 29 Oct 2018 09:42:06 +0000 Subject: [PATCH 175/443] Btrfs: fix missing data checksums after a ranged fsync (msync) Recently we got a massive simplification for fsync, where for the fast path we no longer log new extents while their respective ordered extents are still running. However that simplification introduced a subtle regression for the case where we use a ranged fsync (msync). Consider the following example: CPU 0 CPU 1 mmap write to range [2Mb, 4Mb[ mmap write to range [512Kb, 1Mb[ msync range [512K, 1Mb[ --> triggers fast fsync (BTRFS_INODE_NEEDS_FULL_SYNC not set) --> creates extent map A for this range and adds it to list of modified extents --> starts ordered extent A for this range --> waits for it to complete writeback triggered for range [2Mb, 4Mb[ --> create extent map B and adds it to the list of modified extents --> creates ordered extent B --> start looking for and logging modified extents --> logs extent maps A and B --> finds checksums for extent A in the csum tree, but not for extent B fsync (msync) finishes --> ordered extent B finishes and its checksums are added to the csum tree <power cut> After replaying the log, we have the extent covering the range [2Mb, 4Mb[ but do not have the data checksum items covering that file range. This happens because at the very beginning of an fsync (btrfs_sync_file()) we start and wait for IO in the given range [512Kb, 1Mb[ and therefore wait for any ordered extents in that range to complete before we start logging the extents. However if right before we start logging the extent in our range [512Kb, 1Mb[, writeback is started for any other dirty range, such as the range [2Mb, 4Mb[ due to memory pressure or a concurrent fsync or msync (btrfs_sync_file() starts writeback before acquiring the inode's lock), an ordered extent is created for that other range and a new extent map is created to represent that range and added to the inode's list of modified extents. That means that we will see that other extent in that list when collecting extents for logging (done at btrfs_log_changed_extents()) and log the extent before the respective ordered extent finishes - namely before the checksum items are added to the checksums tree, which is where log_extent_csums() looks for the checksums, therefore making us log an extent without logging its checksums. Before that massive simplification of fsync, this wasn't a problem because besides looking for checkums in the checksums tree, we also looked for them in any ordered extent still running. The consequence of data checksums missing for a file range is that users attempting to read the affected file range will get -EIO errors and dmesg reports the following: [10188.358136] BTRFS info (device sdc): no csum found for inode 297 start 57344 [10188.359278] BTRFS warning (device sdc): csum failed root 5 ino 297 off 57344 csum 0x98f94189 expected csum 0x00000000 mirror 1 So fix this by skipping extents outside of our logging range at btrfs_log_changed_extents() and leaving them on the list of modified extents so that any subsequent ranged fsync may collect them if needed. Also, if we find a hole extent outside of the range still log it, just to prevent having gaps between extent items after replaying the log, otherwise fsck will complain when we are not using the NO_HOLES feature (fstest btrfs/056 triggers such case). Fixes: e7175a692765 ("btrfs: remove the wait ordered logic in the log_one_extent path") CC: stable@vger.kernel.org # 4.19+ Reviewed-by: Josef Bacik <josef@toxicpanda.com> Signed-off-by: Filipe Manana <fdmanana@suse.com> Signed-off-by: David Sterba <dsterba@suse.com> --- fs/btrfs/tree-log.c | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c index e07f3376b7dfc..a5ce99a6c9365 100644 --- a/fs/btrfs/tree-log.c +++ b/fs/btrfs/tree-log.c @@ -4396,6 +4396,23 @@ static int btrfs_log_changed_extents(struct btrfs_trans_handle *trans, logged_end = end; list_for_each_entry_safe(em, n, &tree->modified_extents, list) { + /* + * Skip extents outside our logging range. It's important to do + * it for correctness because if we don't ignore them, we may + * log them before their ordered extent completes, and therefore + * we could log them without logging their respective checksums + * (the checksum items are added to the csum tree at the very + * end of btrfs_finish_ordered_io()). Also leave such extents + * outside of our range in the list, since we may have another + * ranged fsync in the near future that needs them. If an extent + * outside our range corresponds to a hole, log it to avoid + * leaving gaps between extents (fsck will complain when we are + * not using the NO_HOLES feature). + */ + if ((em->start > end || em->start + em->len <= start) && + em->block_start != EXTENT_MAP_HOLE) + continue; + list_del_init(&em->list); /* * Just an arbitrary number, this can be really CPU intensive From 761333f2f50ccc887aa9957ae829300262c0d15b Mon Sep 17 00:00:00 2001 From: Shaokun Zhang <zhangshaokun@hisilicon.com> Date: Mon, 5 Nov 2018 18:49:09 +0800 Subject: [PATCH 176/443] btrfs: tree-checker: Fix misleading group system information block_group_err shows the group system as a decimal value with a '0x' prefix, which is somewhat misleading. Fix it to print hexadecimal, as was intended. Fixes: fce466eab7ac6 ("btrfs: tree-checker: Verify block_group_item") CC: stable@vger.kernel.org # 4.19+ Reviewed-by: Nikolay Borisov <nborisov@suse.com> Reviewed-by: Qu Wenruo <wqu@suse.com> Signed-off-by: Shaokun Zhang <zhangshaokun@hisilicon.com> Reviewed-by: David Sterba <dsterba@suse.com> Signed-off-by: David Sterba <dsterba@suse.com> --- fs/btrfs/tree-checker.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/btrfs/tree-checker.c b/fs/btrfs/tree-checker.c index cab0b1f1f7417..efcf89a8ba44c 100644 --- a/fs/btrfs/tree-checker.c +++ b/fs/btrfs/tree-checker.c @@ -440,7 +440,7 @@ static int check_block_group_item(struct btrfs_fs_info *fs_info, type != (BTRFS_BLOCK_GROUP_METADATA | BTRFS_BLOCK_GROUP_DATA)) { block_group_err(fs_info, leaf, slot, -"invalid type, have 0x%llx (%lu bits set) expect either 0x%llx, 0x%llx, 0x%llu or 0x%llx", +"invalid type, have 0x%llx (%lu bits set) expect either 0x%llx, 0x%llx, 0x%llx or 0x%llx", type, hweight64(type), BTRFS_BLOCK_GROUP_DATA, BTRFS_BLOCK_GROUP_METADATA, BTRFS_BLOCK_GROUP_SYSTEM, From 7e17916b35797396f681a3270245fd29c1e4c250 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann <arnd@arndb.de> Date: Sat, 3 Nov 2018 16:39:28 +0100 Subject: [PATCH 177/443] btrfs: avoid link error with CONFIG_NO_AUTO_INLINE Note: this patch fixes a problem in a feature outside of btrfs ("kernel hacking: add a config option to disable compiler auto-inlining") and is applied ahead of time due to cross-subsystem dependencies. On 32-bit ARM with gcc-8, I see a link error with the addition of the CONFIG_NO_AUTO_INLINE option: fs/btrfs/super.o: In function `btrfs_statfs': super.c:(.text+0x67b8): undefined reference to `__aeabi_uldivmod' super.c:(.text+0x67fc): undefined reference to `__aeabi_uldivmod' super.c:(.text+0x6858): undefined reference to `__aeabi_uldivmod' super.c:(.text+0x6920): undefined reference to `__aeabi_uldivmod' super.c:(.text+0x693c): undefined reference to `__aeabi_uldivmod' fs/btrfs/super.o:super.c:(.text+0x6958): more undefined references to `__aeabi_uldivmod' follow So far this is the only file that shows the behavior, so I'd propose to just work around it by marking the functions as 'static inline' that normally get inlined here. The reference to __aeabi_uldivmod comes from a div_u64() which has an optimization for a constant division that uses a straight '/' operator when the result should be known to the compiler. My interpretation is that as we turn off inlining, gcc still expects the result to be constant but fails to use that constant value. Link: https://lkml.kernel.org/r/20181103153941.1881966-1-arnd@arndb.de Reviewed-by: Nikolay Borisov <nborisov@suse.com> Reviewed-by: Changbin Du <changbin.du@gmail.com> Signed-off-by: Arnd Bergmann <arnd@arndb.de> [ add the note ] Signed-off-by: David Sterba <dsterba@suse.com> --- fs/btrfs/super.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c index b362b45dd7578..cbc9d0d2c12de 100644 --- a/fs/btrfs/super.c +++ b/fs/btrfs/super.c @@ -1916,7 +1916,7 @@ static int btrfs_remount(struct super_block *sb, int *flags, char *data) } /* Used to sort the devices by max_avail(descending sort) */ -static int btrfs_cmp_device_free_bytes(const void *dev_info1, +static inline int btrfs_cmp_device_free_bytes(const void *dev_info1, const void *dev_info2) { if (((struct btrfs_device_info *)dev_info1)->max_avail > @@ -1945,8 +1945,8 @@ static inline void btrfs_descending_sort_devices( * The helper to calc the free space on the devices that can be used to store * file data. */ -static int btrfs_calc_avail_data_space(struct btrfs_fs_info *fs_info, - u64 *free_bytes) +static inline int btrfs_calc_avail_data_space(struct btrfs_fs_info *fs_info, + u64 *free_bytes) { struct btrfs_device_info *devices_info; struct btrfs_fs_devices *fs_devices = fs_info->fs_devices; From 4222ea7100c0e37adace2790c8822758bbeee179 Mon Sep 17 00:00:00 2001 From: Filipe Manana <fdmanana@suse.com> Date: Wed, 24 Oct 2018 10:13:03 +0100 Subject: [PATCH 178/443] Btrfs: fix deadlock on tree root leaf when finding free extent When we are writing out a free space cache, during the transaction commit phase, we can end up in a deadlock which results in a stack trace like the following: schedule+0x28/0x80 btrfs_tree_read_lock+0x8e/0x120 [btrfs] ? finish_wait+0x80/0x80 btrfs_read_lock_root_node+0x2f/0x40 [btrfs] btrfs_search_slot+0xf6/0x9f0 [btrfs] ? evict_refill_and_join+0xd0/0xd0 [btrfs] ? inode_insert5+0x119/0x190 btrfs_lookup_inode+0x3a/0xc0 [btrfs] ? kmem_cache_alloc+0x166/0x1d0 btrfs_iget+0x113/0x690 [btrfs] __lookup_free_space_inode+0xd8/0x150 [btrfs] lookup_free_space_inode+0x5b/0xb0 [btrfs] load_free_space_cache+0x7c/0x170 [btrfs] ? cache_block_group+0x72/0x3b0 [btrfs] cache_block_group+0x1b3/0x3b0 [btrfs] ? finish_wait+0x80/0x80 find_free_extent+0x799/0x1010 [btrfs] btrfs_reserve_extent+0x9b/0x180 [btrfs] btrfs_alloc_tree_block+0x1b3/0x4f0 [btrfs] __btrfs_cow_block+0x11d/0x500 [btrfs] btrfs_cow_block+0xdc/0x180 [btrfs] btrfs_search_slot+0x3bd/0x9f0 [btrfs] btrfs_lookup_inode+0x3a/0xc0 [btrfs] ? kmem_cache_alloc+0x166/0x1d0 btrfs_update_inode_item+0x46/0x100 [btrfs] cache_save_setup+0xe4/0x3a0 [btrfs] btrfs_start_dirty_block_groups+0x1be/0x480 [btrfs] btrfs_commit_transaction+0xcb/0x8b0 [btrfs] At cache_save_setup() we need to update the inode item of a block group's cache which is located in the tree root (fs_info->tree_root), which means that it may result in COWing a leaf from that tree. If that happens we need to find a free metadata extent and while looking for one, if we find a block group which was not cached yet we attempt to load its cache by calling cache_block_group(). However this function will try to load the inode of the free space cache, which requires finding the matching inode item in the tree root - if that inode item is located in the same leaf as the inode item of the space cache we are updating at cache_save_setup(), we end up in a deadlock, since we try to obtain a read lock on the same extent buffer that we previously write locked. So fix this by using the tree root's commit root when searching for a block group's free space cache inode item when we are attempting to load a free space cache. This is safe since block groups once loaded stay in memory forever, as well as their caches, so after they are first loaded we will never need to read their inode items again. For new block groups, once they are created they get their ->cached field set to BTRFS_CACHE_FINISHED meaning we will not need to read their inode item. Reported-by: Andrew Nelson <andrew.s.nelson@gmail.com> Link: https://lore.kernel.org/linux-btrfs/CAPTELenq9x5KOWuQ+fa7h1r3nsJG8vyiTH8+ifjURc_duHh2Wg@mail.gmail.com/ Fixes: 9d66e233c704 ("Btrfs: load free space cache if it exists") Tested-by: Andrew Nelson <andrew.s.nelson@gmail.com> Reviewed-by: Josef Bacik <josef@toxicpanda.com> Signed-off-by: Filipe Manana <fdmanana@suse.com> Signed-off-by: David Sterba <dsterba@suse.com> --- fs/btrfs/ctree.h | 3 +++ fs/btrfs/free-space-cache.c | 22 +++++++++++++++++++++- fs/btrfs/inode.c | 32 ++++++++++++++++++++++---------- 3 files changed, 46 insertions(+), 11 deletions(-) diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index 68ca41dbbef38..f7f955109d8b5 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -3163,6 +3163,9 @@ void btrfs_destroy_inode(struct inode *inode); int btrfs_drop_inode(struct inode *inode); int __init btrfs_init_cachep(void); void __cold btrfs_destroy_cachep(void); +struct inode *btrfs_iget_path(struct super_block *s, struct btrfs_key *location, + struct btrfs_root *root, int *new, + struct btrfs_path *path); struct inode *btrfs_iget(struct super_block *s, struct btrfs_key *location, struct btrfs_root *root, int *was_new); struct extent_map *btrfs_get_extent(struct btrfs_inode *inode, diff --git a/fs/btrfs/free-space-cache.c b/fs/btrfs/free-space-cache.c index 4ba0aedc878bd..74aa552f47930 100644 --- a/fs/btrfs/free-space-cache.c +++ b/fs/btrfs/free-space-cache.c @@ -75,7 +75,8 @@ static struct inode *__lookup_free_space_inode(struct btrfs_root *root, * sure NOFS is set to keep us from deadlocking. */ nofs_flag = memalloc_nofs_save(); - inode = btrfs_iget(fs_info->sb, &location, root, NULL); + inode = btrfs_iget_path(fs_info->sb, &location, root, NULL, path); + btrfs_release_path(path); memalloc_nofs_restore(nofs_flag); if (IS_ERR(inode)) return inode; @@ -838,6 +839,25 @@ int load_free_space_cache(struct btrfs_fs_info *fs_info, path->search_commit_root = 1; path->skip_locking = 1; + /* + * We must pass a path with search_commit_root set to btrfs_iget in + * order to avoid a deadlock when allocating extents for the tree root. + * + * When we are COWing an extent buffer from the tree root, when looking + * for a free extent, at extent-tree.c:find_free_extent(), we can find + * block group without its free space cache loaded. When we find one + * we must load its space cache which requires reading its free space + * cache's inode item from the root tree. If this inode item is located + * in the same leaf that we started COWing before, then we end up in + * deadlock on the extent buffer (trying to read lock it when we + * previously write locked it). + * + * It's safe to read the inode item using the commit root because + * block groups, once loaded, stay in memory forever (until they are + * removed) as well as their space caches once loaded. New block groups + * once created get their ->cached field set to BTRFS_CACHE_FINISHED so + * we will never try to read their inode item while the fs is mounted. + */ inode = lookup_free_space_inode(fs_info, block_group, path); if (IS_ERR(inode)) { btrfs_free_path(path); diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 55761b1519f57..e71daadd2f75a 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -3569,10 +3569,11 @@ static noinline int acls_after_inode_item(struct extent_buffer *leaf, /* * read an inode from the btree into the in-memory inode */ -static int btrfs_read_locked_inode(struct inode *inode) +static int btrfs_read_locked_inode(struct inode *inode, + struct btrfs_path *in_path) { struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb); - struct btrfs_path *path; + struct btrfs_path *path = in_path; struct extent_buffer *leaf; struct btrfs_inode_item *inode_item; struct btrfs_root *root = BTRFS_I(inode)->root; @@ -3588,15 +3589,18 @@ static int btrfs_read_locked_inode(struct inode *inode) if (!ret) filled = true; - path = btrfs_alloc_path(); - if (!path) - return -ENOMEM; + if (!path) { + path = btrfs_alloc_path(); + if (!path) + return -ENOMEM; + } memcpy(&location, &BTRFS_I(inode)->location, sizeof(location)); ret = btrfs_lookup_inode(NULL, root, path, &location, 0); if (ret) { - btrfs_free_path(path); + if (path != in_path) + btrfs_free_path(path); return ret; } @@ -3721,7 +3725,8 @@ static int btrfs_read_locked_inode(struct inode *inode) btrfs_ino(BTRFS_I(inode)), root->root_key.objectid, ret); } - btrfs_free_path(path); + if (path != in_path) + btrfs_free_path(path); if (!maybe_acls) cache_no_acl(inode); @@ -5643,8 +5648,9 @@ static struct inode *btrfs_iget_locked(struct super_block *s, /* Get an inode object given its location and corresponding root. * Returns in *is_new if the inode was read from disk */ -struct inode *btrfs_iget(struct super_block *s, struct btrfs_key *location, - struct btrfs_root *root, int *new) +struct inode *btrfs_iget_path(struct super_block *s, struct btrfs_key *location, + struct btrfs_root *root, int *new, + struct btrfs_path *path) { struct inode *inode; @@ -5655,7 +5661,7 @@ struct inode *btrfs_iget(struct super_block *s, struct btrfs_key *location, if (inode->i_state & I_NEW) { int ret; - ret = btrfs_read_locked_inode(inode); + ret = btrfs_read_locked_inode(inode, path); if (!ret) { inode_tree_add(inode); unlock_new_inode(inode); @@ -5677,6 +5683,12 @@ struct inode *btrfs_iget(struct super_block *s, struct btrfs_key *location, return inode; } +struct inode *btrfs_iget(struct super_block *s, struct btrfs_key *location, + struct btrfs_root *root, int *new) +{ + return btrfs_iget_path(s, location, root, new, NULL); +} + static struct inode *new_simple_dir(struct super_block *s, struct btrfs_key *key, struct btrfs_root *root) From 11023d3f5fdf89bba5e1142127701ca6e6014587 Mon Sep 17 00:00:00 2001 From: Filipe Manana <fdmanana@suse.com> Date: Mon, 5 Nov 2018 11:14:05 +0000 Subject: [PATCH 179/443] Btrfs: fix infinite loop on inode eviction after deduplication of eof block If we attempt to deduplicate the last block of a file A into the middle of a file B, and file A's size is not a multiple of the block size, we end rounding the deduplication length to 0 bytes, to avoid the data corruption issue fixed by commit de02b9f6bb65 ("Btrfs: fix data corruption when deduplicating between different files"). However a length of zero will cause the insertion of an extent state with a start value greater (by 1) then the end value, leading to a corrupt extent state that will trigger a warning and cause chaos such as an infinite loop during inode eviction. Example trace: [96049.833585] ------------[ cut here ]------------ [96049.833714] WARNING: CPU: 0 PID: 24448 at fs/btrfs/extent_io.c:436 insert_state+0x101/0x120 [btrfs] [96049.833767] CPU: 0 PID: 24448 Comm: xfs_io Not tainted 4.19.0-rc7-btrfs-next-39 #1 [96049.833768] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS rel-1.11.2-0-gf9626ccb91-prebuilt.qemu-project.org 04/01/2014 [96049.833780] RIP: 0010:insert_state+0x101/0x120 [btrfs] [96049.833783] RSP: 0018:ffffafd2c3707af0 EFLAGS: 00010282 [96049.833785] RAX: 0000000000000000 RBX: 000000000004dfff RCX: 0000000000000006 [96049.833786] RDX: 0000000000000007 RSI: ffff99045c143230 RDI: ffff99047b2168a0 [96049.833787] RBP: ffff990457851cd0 R08: 0000000000000001 R09: 0000000000000000 [96049.833787] R10: ffffafd2c3707ab8 R11: 0000000000000000 R12: ffff9903b93b12c8 [96049.833788] R13: 000000000004e000 R14: ffffafd2c3707b80 R15: ffffafd2c3707b78 [96049.833790] FS: 00007f5c14e7d700(0000) GS:ffff99047b200000(0000) knlGS:0000000000000000 [96049.833791] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [96049.833792] CR2: 00007f5c146abff8 CR3: 0000000115f4c004 CR4: 00000000003606f0 [96049.833795] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 [96049.833796] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 [96049.833796] Call Trace: [96049.833809] __set_extent_bit+0x46c/0x6a0 [btrfs] [96049.833823] lock_extent_bits+0x6b/0x210 [btrfs] [96049.833831] ? _raw_spin_unlock+0x24/0x30 [96049.833841] ? test_range_bit+0xdf/0x130 [btrfs] [96049.833853] lock_extent_range+0x8e/0x150 [btrfs] [96049.833864] btrfs_double_extent_lock+0x78/0xb0 [btrfs] [96049.833875] btrfs_extent_same_range+0x14e/0x550 [btrfs] [96049.833885] ? rcu_read_lock_sched_held+0x3f/0x70 [96049.833890] ? __kmalloc_node+0x2b0/0x2f0 [96049.833899] ? btrfs_dedupe_file_range+0x19a/0x280 [btrfs] [96049.833909] btrfs_dedupe_file_range+0x270/0x280 [btrfs] [96049.833916] vfs_dedupe_file_range_one+0xd9/0xe0 [96049.833919] vfs_dedupe_file_range+0x131/0x1b0 [96049.833924] do_vfs_ioctl+0x272/0x6e0 [96049.833927] ? __fget+0x113/0x200 [96049.833931] ksys_ioctl+0x70/0x80 [96049.833933] __x64_sys_ioctl+0x16/0x20 [96049.833937] do_syscall_64+0x60/0x1b0 [96049.833939] entry_SYSCALL_64_after_hwframe+0x49/0xbe [96049.833941] RIP: 0033:0x7f5c1478ddd7 [96049.833943] RSP: 002b:00007ffe15b196a8 EFLAGS: 00000202 ORIG_RAX: 0000000000000010 [96049.833945] RAX: ffffffffffffffda RBX: 0000000000000000 RCX: 00007f5c1478ddd7 [96049.833946] RDX: 00005625ece322d0 RSI: 00000000c0189436 RDI: 0000000000000004 [96049.833947] RBP: 0000000000000000 R08: 00007f5c14a46f48 R09: 0000000000000040 [96049.833948] R10: 0000000000000541 R11: 0000000000000202 R12: 0000000000000000 [96049.833949] R13: 0000000000000000 R14: 0000000000000004 R15: 00005625ece322d0 [96049.833954] irq event stamp: 6196 [96049.833956] hardirqs last enabled at (6195): [<ffffffff91b00663>] console_unlock+0x503/0x640 [96049.833958] hardirqs last disabled at (6196): [<ffffffff91a037dd>] trace_hardirqs_off_thunk+0x1a/0x1c [96049.833959] softirqs last enabled at (6114): [<ffffffff92600370>] __do_softirq+0x370/0x421 [96049.833964] softirqs last disabled at (6095): [<ffffffff91a8dd4d>] irq_exit+0xcd/0xe0 [96049.833965] ---[ end trace db7b05f01b7fa10c ]--- [96049.935816] R13: 0000000000000000 R14: 00005562e5259240 R15: 00007ffff092b910 [96049.935822] irq event stamp: 6584 [96049.935823] hardirqs last enabled at (6583): [<ffffffff91b00663>] console_unlock+0x503/0x640 [96049.935825] hardirqs last disabled at (6584): [<ffffffff91a037dd>] trace_hardirqs_off_thunk+0x1a/0x1c [96049.935827] softirqs last enabled at (6328): [<ffffffff92600370>] __do_softirq+0x370/0x421 [96049.935828] softirqs last disabled at (6313): [<ffffffff91a8dd4d>] irq_exit+0xcd/0xe0 [96049.935829] ---[ end trace db7b05f01b7fa123 ]--- [96049.935840] ------------[ cut here ]------------ [96049.936065] WARNING: CPU: 1 PID: 24463 at fs/btrfs/extent_io.c:436 insert_state+0x101/0x120 [btrfs] [96049.936107] CPU: 1 PID: 24463 Comm: umount Tainted: G W 4.19.0-rc7-btrfs-next-39 #1 [96049.936108] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS rel-1.11.2-0-gf9626ccb91-prebuilt.qemu-project.org 04/01/2014 [96049.936117] RIP: 0010:insert_state+0x101/0x120 [btrfs] [96049.936119] RSP: 0018:ffffafd2c3637bc0 EFLAGS: 00010282 [96049.936120] RAX: 0000000000000000 RBX: 000000000004dfff RCX: 0000000000000006 [96049.936121] RDX: 0000000000000007 RSI: ffff990445cf88e0 RDI: ffff99047b2968a0 [96049.936122] RBP: ffff990457851cd0 R08: 0000000000000001 R09: 0000000000000000 [96049.936123] R10: ffffafd2c3637b88 R11: 0000000000000000 R12: ffff9904574301e8 [96049.936124] R13: 000000000004e000 R14: ffffafd2c3637c50 R15: ffffafd2c3637c48 [96049.936125] FS: 00007fe4b87e72c0(0000) GS:ffff99047b280000(0000) knlGS:0000000000000000 [96049.936126] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [96049.936128] CR2: 00005562e52618d8 CR3: 00000001151c8005 CR4: 00000000003606e0 [96049.936129] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 [96049.936131] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 [96049.936131] Call Trace: [96049.936141] __set_extent_bit+0x46c/0x6a0 [btrfs] [96049.936154] lock_extent_bits+0x6b/0x210 [btrfs] [96049.936167] btrfs_evict_inode+0x1e1/0x5a0 [btrfs] [96049.936172] evict+0xbf/0x1c0 [96049.936174] dispose_list+0x51/0x80 [96049.936176] evict_inodes+0x193/0x1c0 [96049.936180] generic_shutdown_super+0x3f/0x110 [96049.936182] kill_anon_super+0xe/0x30 [96049.936189] btrfs_kill_super+0x13/0x100 [btrfs] [96049.936191] deactivate_locked_super+0x3a/0x70 [96049.936193] cleanup_mnt+0x3b/0x80 [96049.936195] task_work_run+0x93/0xc0 [96049.936198] exit_to_usermode_loop+0xfa/0x100 [96049.936201] do_syscall_64+0x17f/0x1b0 [96049.936202] entry_SYSCALL_64_after_hwframe+0x49/0xbe [96049.936204] RIP: 0033:0x7fe4b80cfb37 [96049.936206] RSP: 002b:00007ffff092b688 EFLAGS: 00000246 ORIG_RAX: 00000000000000a6 [96049.936207] RAX: 0000000000000000 RBX: 00005562e5259060 RCX: 00007fe4b80cfb37 [96049.936208] RDX: 0000000000000001 RSI: 0000000000000000 RDI: 00005562e525faa0 [96049.936209] RBP: 00005562e525faa0 R08: 00005562e525f770 R09: 0000000000000015 [96049.936210] R10: 00000000000006b4 R11: 0000000000000246 R12: 00007fe4b85d1e64 [96049.936211] R13: 0000000000000000 R14: 00005562e5259240 R15: 00007ffff092b910 [96049.936211] R13: 0000000000000000 R14: 00005562e5259240 R15: 00007ffff092b910 [96049.936216] irq event stamp: 6616 [96049.936219] hardirqs last enabled at (6615): [<ffffffff91b00663>] console_unlock+0x503/0x640 [96049.936219] hardirqs last disabled at (6616): [<ffffffff91a037dd>] trace_hardirqs_off_thunk+0x1a/0x1c [96049.936222] softirqs last enabled at (6328): [<ffffffff92600370>] __do_softirq+0x370/0x421 [96049.936222] softirqs last disabled at (6313): [<ffffffff91a8dd4d>] irq_exit+0xcd/0xe0 [96049.936223] ---[ end trace db7b05f01b7fa124 ]--- The second stack trace, from inode eviction, is repeated forever due to the infinite loop during eviction. This is the same type of problem fixed way back in 2015 by commit 113e8283869b ("Btrfs: fix inode eviction infinite loop after extent_same ioctl") and commit ccccf3d67294 ("Btrfs: fix inode eviction infinite loop after cloning into it"). So fix this by returning immediately if the deduplication range length gets rounded down to 0 bytes, as there is nothing that needs to be done in such case. Example reproducer: $ mkfs.btrfs -f /dev/sdb $ mount /dev/sdb /mnt $ xfs_io -f -c "pwrite -S 0xe6 0 100" /mnt/foo $ xfs_io -f -c "pwrite -S 0xe6 0 1M" /mnt/bar # Unmount the filesystem and mount it again so that we start without any # extent state records when we ask for the deduplication. $ umount /mnt $ mount /dev/sdb /mnt $ xfs_io -c "dedupe /mnt/foo 0 500K 100" /mnt/bar # This unmount triggers the infinite loop. $ umount /mnt A test case for fstests will follow soon. Fixes: de02b9f6bb65 ("Btrfs: fix data corruption when deduplicating between different files") CC: <stable@vger.kernel.org> # 4.19+ Reviewed-by: Nikolay Borisov <nborisov@suse.com> Signed-off-by: Filipe Manana <fdmanana@suse.com> Signed-off-by: David Sterba <dsterba@suse.com> --- fs/btrfs/ioctl.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c index a990a90451392..ed3ba55f65ac1 100644 --- a/fs/btrfs/ioctl.c +++ b/fs/btrfs/ioctl.c @@ -3488,6 +3488,8 @@ static int btrfs_extent_same_range(struct inode *src, u64 loff, u64 olen, const u64 sz = BTRFS_I(src)->root->fs_info->sectorsize; len = round_down(i_size_read(src), sz) - loff; + if (len == 0) + return 0; olen = len; } } From ac765f83f1397646c11092a032d4f62c3d478b81 Mon Sep 17 00:00:00 2001 From: Filipe Manana <fdmanana@suse.com> Date: Mon, 5 Nov 2018 11:14:17 +0000 Subject: [PATCH 180/443] Btrfs: fix data corruption due to cloning of eof block We currently allow cloning a range from a file which includes the last block of the file even if the file's size is not aligned to the block size. This is fine and useful when the destination file has the same size, but when it does not and the range ends somewhere in the middle of the destination file, it leads to corruption because the bytes between the EOF and the end of the block have undefined data (when there is support for discard/trimming they have a value of 0x00). Example: $ mkfs.btrfs -f /dev/sdb $ mount /dev/sdb /mnt $ export foo_size=$((256 * 1024 + 100)) $ xfs_io -f -c "pwrite -S 0x3c 0 $foo_size" /mnt/foo $ xfs_io -f -c "pwrite -S 0xb5 0 1M" /mnt/bar $ xfs_io -c "reflink /mnt/foo 0 512K $foo_size" /mnt/bar $ od -A d -t x1 /mnt/bar 0000000 b5 b5 b5 b5 b5 b5 b5 b5 b5 b5 b5 b5 b5 b5 b5 b5 * 0524288 3c 3c 3c 3c 3c 3c 3c 3c 3c 3c 3c 3c 3c 3c 3c 3c * 0786528 3c 3c 3c 3c 00 00 00 00 00 00 00 00 00 00 00 00 0786544 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 * 0790528 b5 b5 b5 b5 b5 b5 b5 b5 b5 b5 b5 b5 b5 b5 b5 b5 * 1048576 The bytes in the range from 786532 (512Kb + 256Kb + 100 bytes) to 790527 (512Kb + 256Kb + 4Kb - 1) got corrupted, having now a value of 0x00 instead of 0xb5. This is similar to the problem we had for deduplication that got recently fixed by commit de02b9f6bb65 ("Btrfs: fix data corruption when deduplicating between different files"). Fix this by not allowing such operations to be performed and return the errno -EINVAL to user space. This is what XFS is doing as well at the VFS level. This change however now makes us return -EINVAL instead of -EOPNOTSUPP for cases where the source range maps to an inline extent and the destination range's end is smaller then the destination file's size, since the detection of inline extents is done during the actual process of dropping file extent items (at __btrfs_drop_extents()). Returning the -EINVAL error is done early on and solely based on the input parameters (offsets and length) and destination file's size. This makes us consistent with XFS and anyone else supporting cloning since this case is now checked at a higher level in the VFS and is where the -EINVAL will be returned from starting with kernel 4.20 (the VFS changed was introduced in 4.20-rc1 by commit 07d19dc9fbe9 ("vfs: avoid problematic remapping requests into partial EOF block"). So this change is more geared towards stable kernels, as it's unlikely the new VFS checks get removed intentionally. A test case for fstests follows soon, as well as an update to filter existing tests that expect -EOPNOTSUPP to accept -EINVAL as well. CC: <stable@vger.kernel.org> # 4.4+ Signed-off-by: Filipe Manana <fdmanana@suse.com> Signed-off-by: David Sterba <dsterba@suse.com> --- fs/btrfs/ioctl.c | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c index ed3ba55f65ac1..95f9625dccc4a 100644 --- a/fs/btrfs/ioctl.c +++ b/fs/btrfs/ioctl.c @@ -4279,9 +4279,17 @@ static noinline int btrfs_clone_files(struct file *file, struct file *file_src, goto out_unlock; if (len == 0) olen = len = src->i_size - off; - /* if we extend to eof, continue to block boundary */ - if (off + len == src->i_size) + /* + * If we extend to eof, continue to block boundary if and only if the + * destination end offset matches the destination file's size, otherwise + * we would be corrupting data by placing the eof block into the middle + * of a file. + */ + if (off + len == src->i_size) { + if (!IS_ALIGNED(len, bs) && destoff + len < inode->i_size) + goto out_unlock; len = ALIGN(src->i_size, bs) - off; + } if (len == 0) { ret = 0; From 132bf6723749f7219c399831eeb286dbbb985429 Mon Sep 17 00:00:00 2001 From: Christophe JAILLET <christophe.jaillet@wanadoo.fr> Date: Tue, 6 Nov 2018 07:50:50 -0800 Subject: [PATCH 181/443] xfs: Fix error code in 'xfs_ioc_getbmap()' In this function, once 'buf' has been allocated, we unconditionally return 0. However, 'error' is set to some error codes in several error handling paths. Before commit 232b51948b99 ("xfs: simplify the xfs_getbmap interface") this was not an issue because all error paths were returning directly, but now that some cleanup at the end may be needed, we must propagate the error code. Fixes: 232b51948b99 ("xfs: simplify the xfs_getbmap interface") Signed-off-by: Christophe JAILLET <christophe.jaillet@wanadoo.fr> Reviewed-by: Christoph Hellwig <hch@lst.de> Reviewed-by: Darrick J. Wong <darrick.wong@oracle.com> Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com> --- fs/xfs/xfs_ioctl.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/xfs/xfs_ioctl.c b/fs/xfs/xfs_ioctl.c index 6e2c08f30f602..6ecdbb3af7de5 100644 --- a/fs/xfs/xfs_ioctl.c +++ b/fs/xfs/xfs_ioctl.c @@ -1608,7 +1608,7 @@ xfs_ioc_getbmap( error = 0; out_free_buf: kmem_free(buf); - return 0; + return error; } struct getfsmap_info { From bdec055bb9f262964c4f5cb330dab26646c345c6 Mon Sep 17 00:00:00 2001 From: "Darrick J. Wong" <darrick.wong@oracle.com> Date: Tue, 6 Nov 2018 07:50:50 -0800 Subject: [PATCH 182/443] xfs: print buffer offsets when dumping corrupt buffers Use DUMP_PREFIX_OFFSET when printing hex dumps of corrupt buffers because modern Linux now prints a 32-bit hash of our 64-bit pointer when using DUMP_PREFIX_ADDRESS: 00000000b4bb4297: 00 00 00 00 00 00 00 00 3b ee 00 00 00 00 00 00 ........;....... 00000005ec77e26: 00 00 00 00 02 d0 5a 00 00 00 00 00 00 00 00 00 ......Z......... 000000015938018: 21 98 e8 b4 fd de 4c 07 bc ea 3c e5 ae b4 7c 48 !.....L...<...|H This is totally worthless for a sequential dump since we probably only care about tracking the buffer offsets and afaik there's no way to recover the actual pointer from the hashed value. Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com> Reviewed-by: Dave Chinner <dchinner@redhat.com> --- fs/xfs/xfs_message.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/xfs/xfs_message.c b/fs/xfs/xfs_message.c index 576c375ce12a8..6b736ea58d354 100644 --- a/fs/xfs/xfs_message.c +++ b/fs/xfs/xfs_message.c @@ -107,5 +107,5 @@ assfail(char *expr, char *file, int line) void xfs_hex_dump(void *p, int length) { - print_hex_dump(KERN_ALERT, "", DUMP_PREFIX_ADDRESS, 16, 1, p, length, 1); + print_hex_dump(KERN_ALERT, "", DUMP_PREFIX_OFFSET, 16, 1, p, length, 1); } From 837514f7a4ca4aca06aec5caa5ff56d33ef06976 Mon Sep 17 00:00:00 2001 From: Dave Chinner <dchinner@redhat.com> Date: Tue, 6 Nov 2018 07:50:50 -0800 Subject: [PATCH 183/443] xfs: fix overflow in xfs_attr3_leaf_verify generic/070 on 64k block size filesystems is failing with a verifier corruption on writeback or an attribute leaf block: [ 94.973083] XFS (pmem0): Metadata corruption detected at xfs_attr3_leaf_verify+0x246/0x260, xfs_attr3_leaf block 0x811480 [ 94.975623] XFS (pmem0): Unmount and run xfs_repair [ 94.976720] XFS (pmem0): First 128 bytes of corrupted metadata buffer: [ 94.978270] 000000004b2e7b45: 00 00 00 00 00 00 00 00 3b ee 00 00 00 00 00 00 ........;....... [ 94.980268] 000000006b1db90b: 00 00 00 00 00 81 14 80 00 00 00 00 00 00 00 00 ................ [ 94.982251] 00000000433f2407: 22 7b 5c 82 2d 5c 47 4c bb 31 1c 37 fa a9 ce d6 "{\.-\GL.1.7.... [ 94.984157] 0000000010dc7dfb: 00 00 00 00 00 81 04 8a 00 0a 18 e8 dd 94 01 00 ................ [ 94.986215] 00000000d5a19229: 00 a0 dc f4 fe 98 01 68 f0 d8 07 e0 00 00 00 00 .......h........ [ 94.988171] 00000000521df36c: 0c 2d 32 e2 fe 20 01 00 0c 2d 58 65 fe 0c 01 00 .-2.. ...-Xe.... [ 94.990162] 000000008477ae06: 0c 2d 5b 66 fe 8c 01 00 0c 2d 71 35 fe 7c 01 00 .-[f.....-q5.|.. [ 94.992139] 00000000a4a6bca6: 0c 2d 72 37 fc d4 01 00 0c 2d d8 b8 f0 90 01 00 .-r7.....-...... [ 94.994789] XFS (pmem0): xfs_do_force_shutdown(0x8) called from line 1453 of file fs/xfs/xfs_buf.c. Return address = ffffffff815365f3 This is failing this check: end = ichdr.freemap[i].base + ichdr.freemap[i].size; if (end < ichdr.freemap[i].base) >>>>> return __this_address; if (end > mp->m_attr_geo->blksize) return __this_address; And from the buffer output above, the freemap array is: freemap[0].base = 0x00a0 freemap[0].size = 0xdcf4 end = 0xdd94 freemap[1].base = 0xfe98 freemap[1].size = 0x0168 end = 0x10000 freemap[2].base = 0xf0d8 freemap[2].size = 0x07e0 end = 0xf8b8 These all look valid - the block size is 0x10000 and so from the last check in the above verifier fragment we know that the end of freemap[1] is valid. The problem is that end is declared as: uint16_t end; And (uint16_t)0x10000 = 0. So we have a verifier bug here, not a corruption. Fix the verifier to use uint32_t types for the check and hence avoid the overflow. Fixes: https://bugzilla.kernel.org/show_bug.cgi?id=201577 Signed-off-by: Dave Chinner <dchinner@redhat.com> Reviewed-by: Darrick J. Wong <darrick.wong@oracle.com> Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com> --- fs/xfs/libxfs/xfs_attr_leaf.c | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/fs/xfs/libxfs/xfs_attr_leaf.c b/fs/xfs/libxfs/xfs_attr_leaf.c index 6fc5425b1474a..2652d00842d6b 100644 --- a/fs/xfs/libxfs/xfs_attr_leaf.c +++ b/fs/xfs/libxfs/xfs_attr_leaf.c @@ -243,7 +243,7 @@ xfs_attr3_leaf_verify( struct xfs_mount *mp = bp->b_target->bt_mount; struct xfs_attr_leafblock *leaf = bp->b_addr; struct xfs_attr_leaf_entry *entries; - uint16_t end; + uint32_t end; /* must be 32bit - see below */ int i; xfs_attr3_leaf_hdr_from_disk(mp->m_attr_geo, &ichdr, leaf); @@ -293,6 +293,11 @@ xfs_attr3_leaf_verify( /* * Quickly check the freemap information. Attribute data has to be * aligned to 4-byte boundaries, and likewise for the free space. + * + * Note that for 64k block size filesystems, the freemap entries cannot + * overflow as they are only be16 fields. However, when checking end + * pointer of the freemap, we have to be careful to detect overflows and + * so use uint32_t for those checks. */ for (i = 0; i < XFS_ATTR_LEAF_MAPSIZE; i++) { if (ichdr.freemap[i].base > mp->m_attr_geo->blksize) @@ -303,7 +308,9 @@ xfs_attr3_leaf_verify( return __this_address; if (ichdr.freemap[i].size & 0x3) return __this_address; - end = ichdr.freemap[i].base + ichdr.freemap[i].size; + + /* be care of 16 bit overflows here */ + end = (uint32_t)ichdr.freemap[i].base + ichdr.freemap[i].size; if (end < ichdr.freemap[i].base) return __this_address; if (end > mp->m_attr_geo->blksize) From 7f4cedd882f7cae83177066c2b239ef457ce4a42 Mon Sep 17 00:00:00 2001 From: Giulio Benetti <giulio.benetti@micronovasrl.com> Date: Fri, 5 Oct 2018 23:59:50 +0200 Subject: [PATCH 184/443] drm/sun4i: tcon: fix check of tcon->panel null pointer Since tcon->panel is a pointer returned by of_drm_find_panel() need to check if it is not NULL, hence a valid pointer. IS_ERR() instead checks return error values, not NULL pointers. Substitute "if (!IS_ERR(tcon->panel))" with "if (tcon->panel)". Signed-off-by: Giulio Benetti <giulio.benetti@micronovasrl.com> Signed-off-by: Maxime Ripard <maxime.ripard@bootlin.com> Link: https://patchwork.freedesktop.org/patch/msgid/20181005215951.99003-1-giulio.benetti@micronovasrl.com --- drivers/gpu/drm/sun4i/sun4i_lvds.c | 4 ++-- drivers/gpu/drm/sun4i/sun4i_rgb.c | 4 ++-- drivers/gpu/drm/sun4i/sun4i_tcon.c | 2 +- 3 files changed, 5 insertions(+), 5 deletions(-) diff --git a/drivers/gpu/drm/sun4i/sun4i_lvds.c b/drivers/gpu/drm/sun4i/sun4i_lvds.c index af7dcb6da3514..e7eb0d1e17be5 100644 --- a/drivers/gpu/drm/sun4i/sun4i_lvds.c +++ b/drivers/gpu/drm/sun4i/sun4i_lvds.c @@ -75,7 +75,7 @@ static void sun4i_lvds_encoder_enable(struct drm_encoder *encoder) DRM_DEBUG_DRIVER("Enabling LVDS output\n"); - if (!IS_ERR(tcon->panel)) { + if (tcon->panel) { drm_panel_prepare(tcon->panel); drm_panel_enable(tcon->panel); } @@ -88,7 +88,7 @@ static void sun4i_lvds_encoder_disable(struct drm_encoder *encoder) DRM_DEBUG_DRIVER("Disabling LVDS output\n"); - if (!IS_ERR(tcon->panel)) { + if (tcon->panel) { drm_panel_disable(tcon->panel); drm_panel_unprepare(tcon->panel); } diff --git a/drivers/gpu/drm/sun4i/sun4i_rgb.c b/drivers/gpu/drm/sun4i/sun4i_rgb.c index bf068da6b12e1..f4a22689eb54c 100644 --- a/drivers/gpu/drm/sun4i/sun4i_rgb.c +++ b/drivers/gpu/drm/sun4i/sun4i_rgb.c @@ -135,7 +135,7 @@ static void sun4i_rgb_encoder_enable(struct drm_encoder *encoder) DRM_DEBUG_DRIVER("Enabling RGB output\n"); - if (!IS_ERR(tcon->panel)) { + if (tcon->panel) { drm_panel_prepare(tcon->panel); drm_panel_enable(tcon->panel); } @@ -148,7 +148,7 @@ static void sun4i_rgb_encoder_disable(struct drm_encoder *encoder) DRM_DEBUG_DRIVER("Disabling RGB output\n"); - if (!IS_ERR(tcon->panel)) { + if (tcon->panel) { drm_panel_disable(tcon->panel); drm_panel_unprepare(tcon->panel); } diff --git a/drivers/gpu/drm/sun4i/sun4i_tcon.c b/drivers/gpu/drm/sun4i/sun4i_tcon.c index c78cd35a1294b..e4b3bd0307ef9 100644 --- a/drivers/gpu/drm/sun4i/sun4i_tcon.c +++ b/drivers/gpu/drm/sun4i/sun4i_tcon.c @@ -555,7 +555,7 @@ static void sun4i_tcon0_mode_set_rgb(struct sun4i_tcon *tcon, * Following code is a way to avoid quirks all around TCON * and DOTCLOCK drivers. */ - if (!IS_ERR(tcon->panel)) { + if (tcon->panel) { struct drm_panel *panel = tcon->panel; struct drm_connector *connector = panel->connector; struct drm_display_info display_info = connector->display_info; From a8939766c75c06b5a0ab691ecbba9347e4e520cf Mon Sep 17 00:00:00 2001 From: Giulio Benetti <giulio.benetti@micronovasrl.com> Date: Fri, 5 Oct 2018 23:59:51 +0200 Subject: [PATCH 185/443] drm/sun4i: tcon: prevent tcon->panel dereference if NULL If tcon->panel pointer is NULL, trying to dereference from it (i.e. tcon->panel->connector) will cause a null pointer dereference. Add tcon->panel null pointer check before calling sun4i_tcon0_mode_set_dithering(). Signed-off-by: Giulio Benetti <giulio.benetti@micronovasrl.com> Fixes: f11adcecbd5f ("drm/sun4i: tcon: Add dithering support for RGB565/RGB666 LCD panels") Reviewed-by: Chen-Yu Tsai <wens@csie.org> Signed-off-by: Maxime Ripard <maxime.ripard@bootlin.com> Link: https://patchwork.freedesktop.org/patch/msgid/20181005215951.99003-2-giulio.benetti@micronovasrl.com --- drivers/gpu/drm/sun4i/sun4i_tcon.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/sun4i/sun4i_tcon.c b/drivers/gpu/drm/sun4i/sun4i_tcon.c index e4b3bd0307ef9..f949287d926cd 100644 --- a/drivers/gpu/drm/sun4i/sun4i_tcon.c +++ b/drivers/gpu/drm/sun4i/sun4i_tcon.c @@ -491,7 +491,8 @@ static void sun4i_tcon0_mode_set_rgb(struct sun4i_tcon *tcon, sun4i_tcon0_mode_set_common(tcon, mode); /* Set dithering if needed */ - sun4i_tcon0_mode_set_dithering(tcon, tcon->panel->connector); + if (tcon->panel) + sun4i_tcon0_mode_set_dithering(tcon, tcon->panel->connector); /* Adjust clock delay */ clk_delay = sun4i_tcon_get_clk_delay(mode, 0); From df5e31c204b34e8d9e5ec33f5b28e960c4f25e14 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= <ville.syrjala@linux.intel.com> Date: Thu, 25 Oct 2018 16:05:36 +0300 Subject: [PATCH 186/443] drm/i915: Fix ilk+ watermarks when disabling pipes MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit We're no longer programming any watermarks when we're disabling a pipe. That means ilk_wm_merge() & co. will keep considering the any pipe that is getting disabled as still enabled. Thus we either get no LP1+ watermakrs (ilk-ivb), or we get suboptimal ones (hsw-bdw). This seems to have been broken by commit b6b178a77210 ("drm/i915: Calculate ironlake intermediate watermarks correctly, v2."). Before that we apparently had some difference between the intermediate and optimal watermarks and so we would program the optiomal ones. Now intermediate and optimal are identical for disabled pipes and so we don't program either. Fix this by programming the intermediate watermarks even for disabled pipes. We were already doing that for skl+. We'll leave out gmch platforms for now since those do the merging in a different manner and should work as is. We'll want to unify this eventually, but play it safe for now and just put in a FIXME. Cc: stable@vger.kernel.org Cc: Matt Roper <matthew.d.roper@intel.com> Cc: Maarten Lankhorst <maarten.lankhorst@linux.intel.com> Fixes: b6b178a77210 ("drm/i915: Calculate ironlake intermediate watermarks correctly, v2.") Signed-off-by: Ville Syrjälä <ville.syrjala@linux.intel.com> Link: https://patchwork.freedesktop.org/patch/msgid/20181025130536.29024-1-ville.syrjala@linux.intel.com Reviewed-by: Maarten Lankhorst <maarten.lankhorst@linux.intel.com> #irc (cherry picked from commit a748faea3bfd7fd1d1485bc1c426c7d460cc6503) Signed-off-by: Joonas Lahtinen <joonas.lahtinen@linux.intel.com> --- drivers/gpu/drm/i915/intel_display.c | 17 ++++++----------- 1 file changed, 6 insertions(+), 11 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c index b8dfdbc9ca1f6..23d8008a93bb6 100644 --- a/drivers/gpu/drm/i915/intel_display.c +++ b/drivers/gpu/drm/i915/intel_display.c @@ -12768,17 +12768,12 @@ static void intel_atomic_commit_tail(struct drm_atomic_state *state) intel_check_cpu_fifo_underruns(dev_priv); intel_check_pch_fifo_underruns(dev_priv); - if (!new_crtc_state->active) { - /* - * Make sure we don't call initial_watermarks - * for ILK-style watermark updates. - * - * No clue what this is supposed to achieve. - */ - if (INTEL_GEN(dev_priv) >= 9) - dev_priv->display.initial_watermarks(intel_state, - to_intel_crtc_state(new_crtc_state)); - } + /* FIXME unify this for all platforms */ + if (!new_crtc_state->active && + !HAS_GMCH_DISPLAY(dev_priv) && + dev_priv->display.initial_watermarks) + dev_priv->display.initial_watermarks(intel_state, + to_intel_crtc_state(new_crtc_state)); } } From f98e8a572bddbf27032114127d2fcc78fa5e6a9d Mon Sep 17 00:00:00 2001 From: Ricardo Ribalda Delgado <ricardo.ribalda@gmail.com> Date: Thu, 1 Nov 2018 14:15:49 +0100 Subject: [PATCH 187/443] clk: fixed-factor: fix of_node_get-put imbalance When the fixed factor clock is created by devicetree, of_clk_add_provider is called. Add a call to of_clk_del_provider in the remove function to balance it out. Reported-by: Alan Tull <atull@kernel.org> Fixes: 971451b3b15d ("clk: fixed-factor: Convert into a module platform driver") Signed-off-by: Ricardo Ribalda Delgado <ricardo.ribalda@gmail.com> Signed-off-by: Stephen Boyd <sboyd@kernel.org> --- drivers/clk/clk-fixed-factor.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/clk/clk-fixed-factor.c b/drivers/clk/clk-fixed-factor.c index ef0ca9414f371..ff83e899df71f 100644 --- a/drivers/clk/clk-fixed-factor.c +++ b/drivers/clk/clk-fixed-factor.c @@ -210,6 +210,7 @@ static int of_fixed_factor_clk_remove(struct platform_device *pdev) { struct clk *clk = platform_get_drvdata(pdev); + of_clk_del_provider(pdev->dev.of_node); clk_unregister_fixed_factor(clk); return 0; From 98ee3fc7ef8395f8b7a379e6608aee91efc66d48 Mon Sep 17 00:00:00 2001 From: Boris Brezillon <boris.brezillon@bootlin.com> Date: Tue, 6 Nov 2018 17:25:37 +0100 Subject: [PATCH 188/443] mtd: nand: Fix nanddev_pos_next_page() kernel-doc header Function name is wrong in the kernel-doc header. Fixes: 9c3736a3de21 ("mtd: nand: Add core infrastructure to deal with NAND devices") Signed-off-by: Boris Brezillon <boris.brezillon@bootlin.com> Reviewed-by: Miquel Raynal <miquel.raynal@bootlin.com> --- include/linux/mtd/nand.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/linux/mtd/nand.h b/include/linux/mtd/nand.h index 78b86dea2f294..7f53ece2c039a 100644 --- a/include/linux/mtd/nand.h +++ b/include/linux/mtd/nand.h @@ -568,7 +568,7 @@ static inline void nanddev_pos_next_eraseblock(struct nand_device *nand, } /** - * nanddev_pos_next_eraseblock() - Move a position to the next page + * nanddev_pos_next_page() - Move a position to the next page * @nand: NAND device * @pos: the position to update * From 313a06e636808387822af24c507cba92703568b1 Mon Sep 17 00:00:00 2001 From: Jeremy Linton <jeremy.linton@arm.com> Date: Mon, 5 Nov 2018 18:14:41 -0600 Subject: [PATCH 189/443] lib/raid6: Fix arm64 test build The lib/raid6/test fails to build the neon objects on arm64 because the correct machine type is 'aarch64'. Once this is correctly enabled, the neon recovery objects need to be added to the build. Reviewed-by: Ard Biesheuvel <ard.biesheuvel@linaro.org> Signed-off-by: Jeremy Linton <jeremy.linton@arm.com> Signed-off-by: Catalin Marinas <catalin.marinas@arm.com> --- lib/raid6/test/Makefile | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/lib/raid6/test/Makefile b/lib/raid6/test/Makefile index 5d73f5cb4d8a7..79777645cac9c 100644 --- a/lib/raid6/test/Makefile +++ b/lib/raid6/test/Makefile @@ -27,7 +27,7 @@ ifeq ($(ARCH),arm) CFLAGS += -I../../../arch/arm/include -mfpu=neon HAS_NEON = yes endif -ifeq ($(ARCH),arm64) +ifeq ($(ARCH),aarch64) CFLAGS += -I../../../arch/arm64/include HAS_NEON = yes endif @@ -41,7 +41,7 @@ ifeq ($(IS_X86),yes) gcc -c -x assembler - >&/dev/null && \ rm ./-.o && echo -DCONFIG_AS_AVX512=1) else ifeq ($(HAS_NEON),yes) - OBJS += neon.o neon1.o neon2.o neon4.o neon8.o + OBJS += neon.o neon1.o neon2.o neon4.o neon8.o recov_neon.o recov_neon_inner.o CFLAGS += -DCONFIG_KERNEL_MODE_NEON=1 else HAS_ALTIVEC := $(shell printf '\#include <altivec.h>\nvector int a;\n' |\ From 5d96c9342c23ee1d084802dcf064caa67ecaa45b Mon Sep 17 00:00:00 2001 From: Vishal Verma <vishal.l.verma@intel.com> Date: Thu, 25 Oct 2018 18:37:28 -0600 Subject: [PATCH 190/443] acpi/nfit, x86/mce: Handle only uncorrectable machine checks The MCE handler for nfit devices is called for memory errors on a Non-Volatile DIMM and adds the error location to a 'badblocks' list. This list is used by the various NVDIMM drivers to avoid consuming known poison locations during IO. The MCE handler gets called for both corrected and uncorrectable errors. Until now, both kinds of errors have been added to the badblocks list. However, corrected memory errors indicate that the problem has already been fixed by hardware, and the resulting interrupt is merely a notification to Linux. As far as future accesses to that location are concerned, it is perfectly fine to use, and thus doesn't need to be included in the above badblocks list. Add a check in the nfit MCE handler to filter out corrected mce events, and only process uncorrectable errors. Fixes: 6839a6d96f4e ("nfit: do an ARS scrub on hitting a latent media error") Reported-by: Omar Avelar <omar.avelar@intel.com> Signed-off-by: Vishal Verma <vishal.l.verma@intel.com> Signed-off-by: Borislav Petkov <bp@suse.de> CC: Arnd Bergmann <arnd@arndb.de> CC: Dan Williams <dan.j.williams@intel.com> CC: Dave Jiang <dave.jiang@intel.com> CC: elliott@hpe.com CC: "H. Peter Anvin" <hpa@zytor.com> CC: Ingo Molnar <mingo@redhat.com> CC: Len Brown <lenb@kernel.org> CC: linux-acpi@vger.kernel.org CC: linux-edac <linux-edac@vger.kernel.org> CC: linux-nvdimm@lists.01.org CC: Qiuxu Zhuo <qiuxu.zhuo@intel.com> CC: "Rafael J. Wysocki" <rjw@rjwysocki.net> CC: Ross Zwisler <zwisler@kernel.org> CC: stable <stable@vger.kernel.org> CC: Thomas Gleixner <tglx@linutronix.de> CC: Tony Luck <tony.luck@intel.com> CC: x86-ml <x86@kernel.org> CC: Yazen Ghannam <yazen.ghannam@amd.com> Link: http://lkml.kernel.org/r/20181026003729.8420-1-vishal.l.verma@intel.com --- arch/x86/include/asm/mce.h | 1 + arch/x86/kernel/cpu/mcheck/mce.c | 3 ++- drivers/acpi/nfit/mce.c | 4 ++-- 3 files changed, 5 insertions(+), 3 deletions(-) diff --git a/arch/x86/include/asm/mce.h b/arch/x86/include/asm/mce.h index 4da9b1c58d287..dbd9fe2f6163d 100644 --- a/arch/x86/include/asm/mce.h +++ b/arch/x86/include/asm/mce.h @@ -221,6 +221,7 @@ static inline void mce_hygon_feature_init(struct cpuinfo_x86 *c) { return mce_am int mce_available(struct cpuinfo_x86 *c); bool mce_is_memory_error(struct mce *m); +bool mce_is_correctable(struct mce *m); DECLARE_PER_CPU(unsigned, mce_exception_count); DECLARE_PER_CPU(unsigned, mce_poll_count); diff --git a/arch/x86/kernel/cpu/mcheck/mce.c b/arch/x86/kernel/cpu/mcheck/mce.c index 8c66d2fc8f81d..77527b8ea982a 100644 --- a/arch/x86/kernel/cpu/mcheck/mce.c +++ b/arch/x86/kernel/cpu/mcheck/mce.c @@ -534,7 +534,7 @@ bool mce_is_memory_error(struct mce *m) } EXPORT_SYMBOL_GPL(mce_is_memory_error); -static bool mce_is_correctable(struct mce *m) +bool mce_is_correctable(struct mce *m) { if (m->cpuvendor == X86_VENDOR_AMD && m->status & MCI_STATUS_DEFERRED) return false; @@ -547,6 +547,7 @@ static bool mce_is_correctable(struct mce *m) return true; } +EXPORT_SYMBOL_GPL(mce_is_correctable); static bool cec_add_mce(struct mce *m) { diff --git a/drivers/acpi/nfit/mce.c b/drivers/acpi/nfit/mce.c index e9626bf6ca296..7a51707f87e95 100644 --- a/drivers/acpi/nfit/mce.c +++ b/drivers/acpi/nfit/mce.c @@ -25,8 +25,8 @@ static int nfit_handle_mce(struct notifier_block *nb, unsigned long val, struct acpi_nfit_desc *acpi_desc; struct nfit_spa *nfit_spa; - /* We only care about memory errors */ - if (!mce_is_memory_error(mce)) + /* We only care about uncorrectable memory errors */ + if (!mce_is_memory_error(mce) || mce_is_correctable(mce)) return NOTIFY_DONE; /* From e8a308e5f47e545e0d41d0686c00f5f5217c5f61 Mon Sep 17 00:00:00 2001 From: Vishal Verma <vishal.l.verma@intel.com> Date: Thu, 25 Oct 2018 18:37:29 -0600 Subject: [PATCH 191/443] acpi/nfit, x86/mce: Validate a MCE's address before using it The NFIT machine check handler uses the physical address from the mce structure, and compares it against information in the ACPI NFIT table to determine whether that location lies on an NVDIMM. The mce->addr field however may not always be valid, and this is indicated by the MCI_STATUS_ADDRV bit in the status field. Export mce_usable_address() which already performs validation for the address, and use it in the NFIT handler. Fixes: 6839a6d96f4e ("nfit: do an ARS scrub on hitting a latent media error") Reported-by: Robert Elliott <elliott@hpe.com> Signed-off-by: Vishal Verma <vishal.l.verma@intel.com> Signed-off-by: Borislav Petkov <bp@suse.de> CC: Arnd Bergmann <arnd@arndb.de> Cc: Dan Williams <dan.j.williams@intel.com> CC: Dave Jiang <dave.jiang@intel.com> CC: elliott@hpe.com CC: "H. Peter Anvin" <hpa@zytor.com> CC: Ingo Molnar <mingo@redhat.com> CC: Len Brown <lenb@kernel.org> CC: linux-acpi@vger.kernel.org CC: linux-edac <linux-edac@vger.kernel.org> CC: linux-nvdimm@lists.01.org CC: Qiuxu Zhuo <qiuxu.zhuo@intel.com> CC: "Rafael J. Wysocki" <rjw@rjwysocki.net> CC: Ross Zwisler <zwisler@kernel.org> CC: stable <stable@vger.kernel.org> CC: Thomas Gleixner <tglx@linutronix.de> CC: Tony Luck <tony.luck@intel.com> CC: x86-ml <x86@kernel.org> CC: Yazen Ghannam <yazen.ghannam@amd.com> Link: http://lkml.kernel.org/r/20181026003729.8420-2-vishal.l.verma@intel.com --- arch/x86/include/asm/mce.h | 1 + arch/x86/kernel/cpu/mcheck/mce.c | 3 ++- drivers/acpi/nfit/mce.c | 4 ++++ 3 files changed, 7 insertions(+), 1 deletion(-) diff --git a/arch/x86/include/asm/mce.h b/arch/x86/include/asm/mce.h index dbd9fe2f6163d..c1a812bd5a27d 100644 --- a/arch/x86/include/asm/mce.h +++ b/arch/x86/include/asm/mce.h @@ -222,6 +222,7 @@ static inline void mce_hygon_feature_init(struct cpuinfo_x86 *c) { return mce_am int mce_available(struct cpuinfo_x86 *c); bool mce_is_memory_error(struct mce *m); bool mce_is_correctable(struct mce *m); +int mce_usable_address(struct mce *m); DECLARE_PER_CPU(unsigned, mce_exception_count); DECLARE_PER_CPU(unsigned, mce_poll_count); diff --git a/arch/x86/kernel/cpu/mcheck/mce.c b/arch/x86/kernel/cpu/mcheck/mce.c index 77527b8ea982a..36d2696c9563e 100644 --- a/arch/x86/kernel/cpu/mcheck/mce.c +++ b/arch/x86/kernel/cpu/mcheck/mce.c @@ -485,7 +485,7 @@ static void mce_report_event(struct pt_regs *regs) * be somewhat complicated (e.g. segment offset would require an instruction * parser). So only support physical addresses up to page granuality for now. */ -static int mce_usable_address(struct mce *m) +int mce_usable_address(struct mce *m) { if (!(m->status & MCI_STATUS_ADDRV)) return 0; @@ -505,6 +505,7 @@ static int mce_usable_address(struct mce *m) return 1; } +EXPORT_SYMBOL_GPL(mce_usable_address); bool mce_is_memory_error(struct mce *m) { diff --git a/drivers/acpi/nfit/mce.c b/drivers/acpi/nfit/mce.c index 7a51707f87e95..d6c1b10f6c254 100644 --- a/drivers/acpi/nfit/mce.c +++ b/drivers/acpi/nfit/mce.c @@ -29,6 +29,10 @@ static int nfit_handle_mce(struct notifier_block *nb, unsigned long val, if (!mce_is_memory_error(mce) || mce_is_correctable(mce)) return NOTIFY_DONE; + /* Verify the address reported in the MCE is valid. */ + if (!mce_usable_address(mce)) + return NOTIFY_DONE; + /* * mce->addr contains the physical addr accessed that caused the * machine check. We need to walk through the list of NFITs, and see From 042cb56478152b31c50bea8a784fc826891eb38e Mon Sep 17 00:00:00 2001 From: Tao Ren <taoren@fb.com> Date: Mon, 5 Nov 2018 14:35:40 -0800 Subject: [PATCH 192/443] net: phy: Allow BCM54616S PHY to setup internal TX/RX clock delay This patch allows users to enable/disable internal TX and/or RX clock delay for BCM54616S PHYs so as to satisfy RGMII timing specifications. On a particular platform, whether TX and/or RX clock delay is required depends on how PHY connected to the MAC IP. This requirement can be specified through "phy-mode" property in the platform device tree. The patch is inspired by commit 733336262b28 ("net: phy: Allow BCM5481x PHYs to setup internal TX/RX clock delay"). Signed-off-by: Tao Ren <taoren@fb.com> Reviewed-by: Florian Fainelli <f.fainelli@gmail.com> Signed-off-by: David S. Miller <davem@davemloft.net> --- drivers/net/phy/broadcom.c | 18 ++++++++++++++++-- 1 file changed, 16 insertions(+), 2 deletions(-) diff --git a/drivers/net/phy/broadcom.c b/drivers/net/phy/broadcom.c index e86ea105c8022..7045370104537 100644 --- a/drivers/net/phy/broadcom.c +++ b/drivers/net/phy/broadcom.c @@ -92,7 +92,7 @@ static int bcm54612e_config_init(struct phy_device *phydev) return 0; } -static int bcm5481x_config(struct phy_device *phydev) +static int bcm54xx_config_clock_delay(struct phy_device *phydev) { int rc, val; @@ -429,7 +429,7 @@ static int bcm5481_config_aneg(struct phy_device *phydev) ret = genphy_config_aneg(phydev); /* Then we can set up the delay. */ - bcm5481x_config(phydev); + bcm54xx_config_clock_delay(phydev); if (of_property_read_bool(np, "enet-phy-lane-swap")) { /* Lane Swap - Undocumented register...magic! */ @@ -442,6 +442,19 @@ static int bcm5481_config_aneg(struct phy_device *phydev) return ret; } +static int bcm54616s_config_aneg(struct phy_device *phydev) +{ + int ret; + + /* Aneg firsly. */ + ret = genphy_config_aneg(phydev); + + /* Then we can set up the delay. */ + bcm54xx_config_clock_delay(phydev); + + return ret; +} + static int brcm_phy_setbits(struct phy_device *phydev, int reg, int set) { int val; @@ -636,6 +649,7 @@ static struct phy_driver broadcom_drivers[] = { .features = PHY_GBIT_FEATURES, .flags = PHY_HAS_INTERRUPT, .config_init = bcm54xx_config_init, + .config_aneg = bcm54616s_config_aneg, .ack_interrupt = bcm_phy_ack_intr, .config_intr = bcm_phy_config_intr, }, { From d52888aa2753e3063a9d3a0c9f72f94aa9809c15 Mon Sep 17 00:00:00 2001 From: "Kirill A. Shutemov" <kirill.shutemov@linux.intel.com> Date: Fri, 26 Oct 2018 15:28:54 +0300 Subject: [PATCH 193/443] x86/mm: Move LDT remap out of KASLR region on 5-level paging On 5-level paging the LDT remap area is placed in the middle of the KASLR randomization region and it can overlap with the direct mapping, the vmalloc or the vmap area. The LDT mapping is per mm, so it cannot be moved into the P4D page table next to the CPU_ENTRY_AREA without complicating PGD table allocation for 5-level paging. The 4 PGD slot gap just before the direct mapping is reserved for hypervisors, so it cannot be used. Move the direct mapping one slot deeper and use the resulting gap for the LDT remap area. The resulting layout is the same for 4 and 5 level paging. [ tglx: Massaged changelog ] Fixes: f55f0501cbf6 ("x86/pti: Put the LDT in its own PGD if PTI is on") Signed-off-by: Kirill A. Shutemov <kirill.shutemov@linux.intel.com> Signed-off-by: Thomas Gleixner <tglx@linutronix.de> Reviewed-by: Andy Lutomirski <luto@kernel.org> Cc: bp@alien8.de Cc: hpa@zytor.com Cc: dave.hansen@linux.intel.com Cc: peterz@infradead.org Cc: boris.ostrovsky@oracle.com Cc: jgross@suse.com Cc: bhe@redhat.com Cc: willy@infradead.org Cc: linux-mm@kvack.org Cc: stable@vger.kernel.org Link: https://lkml.kernel.org/r/20181026122856.66224-2-kirill.shutemov@linux.intel.com --- Documentation/x86/x86_64/mm.txt | 34 +++++++++++++------------ arch/x86/include/asm/page_64_types.h | 12 +++++---- arch/x86/include/asm/pgtable_64_types.h | 4 +-- arch/x86/xen/mmu_pv.c | 6 ++--- 4 files changed, 29 insertions(+), 27 deletions(-) diff --git a/Documentation/x86/x86_64/mm.txt b/Documentation/x86/x86_64/mm.txt index 73aaaa3da4369..804f9426ed17b 100644 --- a/Documentation/x86/x86_64/mm.txt +++ b/Documentation/x86/x86_64/mm.txt @@ -34,23 +34,24 @@ __________________|____________|__________________|_________|___________________ ____________________________________________________________|___________________________________________________________ | | | | ffff800000000000 | -128 TB | ffff87ffffffffff | 8 TB | ... guard hole, also reserved for hypervisor - ffff880000000000 | -120 TB | ffffc7ffffffffff | 64 TB | direct mapping of all physical memory (page_offset_base) - ffffc80000000000 | -56 TB | ffffc8ffffffffff | 1 TB | ... unused hole + ffff880000000000 | -120 TB | ffff887fffffffff | 0.5 TB | LDT remap for PTI + ffff888000000000 | -119.5 TB | ffffc87fffffffff | 64 TB | direct mapping of all physical memory (page_offset_base) + ffffc88000000000 | -55.5 TB | ffffc8ffffffffff | 0.5 TB | ... unused hole ffffc90000000000 | -55 TB | ffffe8ffffffffff | 32 TB | vmalloc/ioremap space (vmalloc_base) ffffe90000000000 | -23 TB | ffffe9ffffffffff | 1 TB | ... unused hole ffffea0000000000 | -22 TB | ffffeaffffffffff | 1 TB | virtual memory map (vmemmap_base) ffffeb0000000000 | -21 TB | ffffebffffffffff | 1 TB | ... unused hole ffffec0000000000 | -20 TB | fffffbffffffffff | 16 TB | KASAN shadow memory - fffffc0000000000 | -4 TB | fffffdffffffffff | 2 TB | ... unused hole - | | | | vaddr_end for KASLR - fffffe0000000000 | -2 TB | fffffe7fffffffff | 0.5 TB | cpu_entry_area mapping - fffffe8000000000 | -1.5 TB | fffffeffffffffff | 0.5 TB | LDT remap for PTI - ffffff0000000000 | -1 TB | ffffff7fffffffff | 0.5 TB | %esp fixup stacks __________________|____________|__________________|_________|____________________________________________________________ | - | Identical layout to the 47-bit one from here on: + | Identical layout to the 56-bit one from here on: ____________________________________________________________|____________________________________________________________ | | | | + fffffc0000000000 | -4 TB | fffffdffffffffff | 2 TB | ... unused hole + | | | | vaddr_end for KASLR + fffffe0000000000 | -2 TB | fffffe7fffffffff | 0.5 TB | cpu_entry_area mapping + fffffe8000000000 | -1.5 TB | fffffeffffffffff | 0.5 TB | ... unused hole + ffffff0000000000 | -1 TB | ffffff7fffffffff | 0.5 TB | %esp fixup stacks ffffff8000000000 | -512 GB | ffffffeeffffffff | 444 GB | ... unused hole ffffffef00000000 | -68 GB | fffffffeffffffff | 64 GB | EFI region mapping space ffffffff00000000 | -4 GB | ffffffff7fffffff | 2 GB | ... unused hole @@ -83,7 +84,7 @@ Notes: __________________|____________|__________________|_________|___________________________________________________________ | | | | 0000800000000000 | +64 PB | ffff7fffffffffff | ~16K PB | ... huge, still almost 64 bits wide hole of non-canonical - | | | | virtual memory addresses up to the -128 TB + | | | | virtual memory addresses up to the -64 PB | | | | starting offset of kernel mappings. __________________|____________|__________________|_________|___________________________________________________________ | @@ -91,23 +92,24 @@ __________________|____________|__________________|_________|___________________ ____________________________________________________________|___________________________________________________________ | | | | ff00000000000000 | -64 PB | ff0fffffffffffff | 4 PB | ... guard hole, also reserved for hypervisor - ff10000000000000 | -60 PB | ff8fffffffffffff | 32 PB | direct mapping of all physical memory (page_offset_base) - ff90000000000000 | -28 PB | ff9fffffffffffff | 4 PB | LDT remap for PTI + ff10000000000000 | -60 PB | ff10ffffffffffff | 0.25 PB | LDT remap for PTI + ff11000000000000 | -59.75 PB | ff90ffffffffffff | 32 PB | direct mapping of all physical memory (page_offset_base) + ff91000000000000 | -27.75 PB | ff9fffffffffffff | 3.75 PB | ... unused hole ffa0000000000000 | -24 PB | ffd1ffffffffffff | 12.5 PB | vmalloc/ioremap space (vmalloc_base) ffd2000000000000 | -11.5 PB | ffd3ffffffffffff | 0.5 PB | ... unused hole ffd4000000000000 | -11 PB | ffd5ffffffffffff | 0.5 PB | virtual memory map (vmemmap_base) ffd6000000000000 | -10.5 PB | ffdeffffffffffff | 2.25 PB | ... unused hole ffdf000000000000 | -8.25 PB | fffffdffffffffff | ~8 PB | KASAN shadow memory - fffffc0000000000 | -4 TB | fffffdffffffffff | 2 TB | ... unused hole - | | | | vaddr_end for KASLR - fffffe0000000000 | -2 TB | fffffe7fffffffff | 0.5 TB | cpu_entry_area mapping - fffffe8000000000 | -1.5 TB | fffffeffffffffff | 0.5 TB | ... unused hole - ffffff0000000000 | -1 TB | ffffff7fffffffff | 0.5 TB | %esp fixup stacks __________________|____________|__________________|_________|____________________________________________________________ | | Identical layout to the 47-bit one from here on: ____________________________________________________________|____________________________________________________________ | | | | + fffffc0000000000 | -4 TB | fffffdffffffffff | 2 TB | ... unused hole + | | | | vaddr_end for KASLR + fffffe0000000000 | -2 TB | fffffe7fffffffff | 0.5 TB | cpu_entry_area mapping + fffffe8000000000 | -1.5 TB | fffffeffffffffff | 0.5 TB | ... unused hole + ffffff0000000000 | -1 TB | ffffff7fffffffff | 0.5 TB | %esp fixup stacks ffffff8000000000 | -512 GB | ffffffeeffffffff | 444 GB | ... unused hole ffffffef00000000 | -68 GB | fffffffeffffffff | 64 GB | EFI region mapping space ffffffff00000000 | -4 GB | ffffffff7fffffff | 2 GB | ... unused hole diff --git a/arch/x86/include/asm/page_64_types.h b/arch/x86/include/asm/page_64_types.h index cd0cf1c568b4c..8f657286d599a 100644 --- a/arch/x86/include/asm/page_64_types.h +++ b/arch/x86/include/asm/page_64_types.h @@ -33,12 +33,14 @@ /* * Set __PAGE_OFFSET to the most negative possible address + - * PGDIR_SIZE*16 (pgd slot 272). The gap is to allow a space for a - * hypervisor to fit. Choosing 16 slots here is arbitrary, but it's - * what Xen requires. + * PGDIR_SIZE*17 (pgd slot 273). + * + * The gap is to allow a space for LDT remap for PTI (1 pgd slot) and space for + * a hypervisor (16 slots). Choosing 16 slots for a hypervisor is arbitrary, + * but it's what Xen requires. */ -#define __PAGE_OFFSET_BASE_L5 _AC(0xff10000000000000, UL) -#define __PAGE_OFFSET_BASE_L4 _AC(0xffff880000000000, UL) +#define __PAGE_OFFSET_BASE_L5 _AC(0xff11000000000000, UL) +#define __PAGE_OFFSET_BASE_L4 _AC(0xffff888000000000, UL) #ifdef CONFIG_DYNAMIC_MEMORY_LAYOUT #define __PAGE_OFFSET page_offset_base diff --git a/arch/x86/include/asm/pgtable_64_types.h b/arch/x86/include/asm/pgtable_64_types.h index 04edd2d58211a..84bd9bdc1987f 100644 --- a/arch/x86/include/asm/pgtable_64_types.h +++ b/arch/x86/include/asm/pgtable_64_types.h @@ -111,9 +111,7 @@ extern unsigned int ptrs_per_p4d; */ #define MAXMEM (1UL << MAX_PHYSMEM_BITS) -#define LDT_PGD_ENTRY_L4 -3UL -#define LDT_PGD_ENTRY_L5 -112UL -#define LDT_PGD_ENTRY (pgtable_l5_enabled() ? LDT_PGD_ENTRY_L5 : LDT_PGD_ENTRY_L4) +#define LDT_PGD_ENTRY -240UL #define LDT_BASE_ADDR (LDT_PGD_ENTRY << PGDIR_SHIFT) #define LDT_END_ADDR (LDT_BASE_ADDR + PGDIR_SIZE) diff --git a/arch/x86/xen/mmu_pv.c b/arch/x86/xen/mmu_pv.c index 0d7b3ae4960bb..a5d7ed1253370 100644 --- a/arch/x86/xen/mmu_pv.c +++ b/arch/x86/xen/mmu_pv.c @@ -1905,7 +1905,7 @@ void __init xen_setup_kernel_pagetable(pgd_t *pgd, unsigned long max_pfn) init_top_pgt[0] = __pgd(0); /* Pre-constructed entries are in pfn, so convert to mfn */ - /* L4[272] -> level3_ident_pgt */ + /* L4[273] -> level3_ident_pgt */ /* L4[511] -> level3_kernel_pgt */ convert_pfn_mfn(init_top_pgt); @@ -1925,8 +1925,8 @@ void __init xen_setup_kernel_pagetable(pgd_t *pgd, unsigned long max_pfn) addr[0] = (unsigned long)pgd; addr[1] = (unsigned long)l3; addr[2] = (unsigned long)l2; - /* Graft it onto L4[272][0]. Note that we creating an aliasing problem: - * Both L4[272][0] and L4[511][510] have entries that point to the same + /* Graft it onto L4[273][0]. Note that we creating an aliasing problem: + * Both L4[273][0] and L4[511][510] have entries that point to the same * L2 (PMD) tables. Meaning that if you modify it in __va space * it will be also modified in the __ka space! (But if you just * modify the PMD table to point to other PTE's or none, then you From a0e6e0831c516860fc7f9be1db6c081fe902ebcf Mon Sep 17 00:00:00 2001 From: "Kirill A. Shutemov" <kirill.shutemov@linux.intel.com> Date: Fri, 26 Oct 2018 15:28:55 +0300 Subject: [PATCH 194/443] x86/ldt: Unmap PTEs for the slot before freeing LDT pages modify_ldt(2) leaves the old LDT mapped after switching over to the new one. The old LDT gets freed and the pages can be re-used. Leaving the mapping in place can have security implications. The mapping is present in the userspace page tables and Meltdown-like attacks can read these freed and possibly reused pages. It's relatively simple to fix: unmap the old LDT and flush TLB before freeing the old LDT memory. This further allows to avoid flushing the TLB in map_ldt_struct() as the slot is unmapped and flushed by unmap_ldt_struct() or has never been mapped at all. [ tglx: Massaged changelog and removed the needless line breaks ] Fixes: f55f0501cbf6 ("x86/pti: Put the LDT in its own PGD if PTI is on") Signed-off-by: Kirill A. Shutemov <kirill.shutemov@linux.intel.com> Signed-off-by: Thomas Gleixner <tglx@linutronix.de> Cc: bp@alien8.de Cc: hpa@zytor.com Cc: dave.hansen@linux.intel.com Cc: luto@kernel.org Cc: peterz@infradead.org Cc: boris.ostrovsky@oracle.com Cc: jgross@suse.com Cc: bhe@redhat.com Cc: willy@infradead.org Cc: linux-mm@kvack.org Cc: stable@vger.kernel.org Link: https://lkml.kernel.org/r/20181026122856.66224-3-kirill.shutemov@linux.intel.com --- arch/x86/kernel/ldt.c | 51 ++++++++++++++++++++++++++++++++----------- 1 file changed, 38 insertions(+), 13 deletions(-) diff --git a/arch/x86/kernel/ldt.c b/arch/x86/kernel/ldt.c index ab18e0884dc6f..18e4525c5933e 100644 --- a/arch/x86/kernel/ldt.c +++ b/arch/x86/kernel/ldt.c @@ -199,14 +199,6 @@ static void sanity_check_ldt_mapping(struct mm_struct *mm) /* * If PTI is enabled, this maps the LDT into the kernelmode and * usermode tables for the given mm. - * - * There is no corresponding unmap function. Even if the LDT is freed, we - * leave the PTEs around until the slot is reused or the mm is destroyed. - * This is harmless: the LDT is always in ordinary memory, and no one will - * access the freed slot. - * - * If we wanted to unmap freed LDTs, we'd also need to do a flush to make - * it useful, and the flush would slow down modify_ldt(). */ static int map_ldt_struct(struct mm_struct *mm, struct ldt_struct *ldt, int slot) @@ -214,8 +206,8 @@ map_ldt_struct(struct mm_struct *mm, struct ldt_struct *ldt, int slot) unsigned long va; bool is_vmalloc; spinlock_t *ptl; + int i, nr_pages; pgd_t *pgd; - int i; if (!static_cpu_has(X86_FEATURE_PTI)) return 0; @@ -238,7 +230,9 @@ map_ldt_struct(struct mm_struct *mm, struct ldt_struct *ldt, int slot) is_vmalloc = is_vmalloc_addr(ldt->entries); - for (i = 0; i * PAGE_SIZE < ldt->nr_entries * LDT_ENTRY_SIZE; i++) { + nr_pages = DIV_ROUND_UP(ldt->nr_entries * LDT_ENTRY_SIZE, PAGE_SIZE); + + for (i = 0; i < nr_pages; i++) { unsigned long offset = i << PAGE_SHIFT; const void *src = (char *)ldt->entries + offset; unsigned long pfn; @@ -272,13 +266,39 @@ map_ldt_struct(struct mm_struct *mm, struct ldt_struct *ldt, int slot) /* Propagate LDT mapping to the user page-table */ map_ldt_struct_to_user(mm); - va = (unsigned long)ldt_slot_va(slot); - flush_tlb_mm_range(mm, va, va + LDT_SLOT_STRIDE, PAGE_SHIFT, false); - ldt->slot = slot; return 0; } +static void unmap_ldt_struct(struct mm_struct *mm, struct ldt_struct *ldt) +{ + unsigned long va; + int i, nr_pages; + + if (!ldt) + return; + + /* LDT map/unmap is only required for PTI */ + if (!static_cpu_has(X86_FEATURE_PTI)) + return; + + nr_pages = DIV_ROUND_UP(ldt->nr_entries * LDT_ENTRY_SIZE, PAGE_SIZE); + + for (i = 0; i < nr_pages; i++) { + unsigned long offset = i << PAGE_SHIFT; + spinlock_t *ptl; + pte_t *ptep; + + va = (unsigned long)ldt_slot_va(ldt->slot) + offset; + ptep = get_locked_pte(mm, va, &ptl); + pte_clear(mm, va, ptep); + pte_unmap_unlock(ptep, ptl); + } + + va = (unsigned long)ldt_slot_va(ldt->slot); + flush_tlb_mm_range(mm, va, va + nr_pages * PAGE_SIZE, PAGE_SHIFT, false); +} + #else /* !CONFIG_PAGE_TABLE_ISOLATION */ static int @@ -286,6 +306,10 @@ map_ldt_struct(struct mm_struct *mm, struct ldt_struct *ldt, int slot) { return 0; } + +static void unmap_ldt_struct(struct mm_struct *mm, struct ldt_struct *ldt) +{ +} #endif /* CONFIG_PAGE_TABLE_ISOLATION */ static void free_ldt_pgtables(struct mm_struct *mm) @@ -524,6 +548,7 @@ static int write_ldt(void __user *ptr, unsigned long bytecount, int oldmode) } install_ldt(mm, new_ldt); + unmap_ldt_struct(mm, old_ldt); free_ldt_struct(old_ldt); error = 0; From b082f2dd80612015cd6d9d84e52099734ec9a0e1 Mon Sep 17 00:00:00 2001 From: "Kirill A. Shutemov" <kirill.shutemov@linux.intel.com> Date: Fri, 26 Oct 2018 15:28:56 +0300 Subject: [PATCH 195/443] x86/ldt: Remove unused variable in map_ldt_struct() Splitting out the sanity check in map_ldt_struct() moved page table syncing into a separate function, which made the pgd variable unused. Remove it. [ tglx: Massaged changelog ] Fixes: 9bae3197e15d ("x86/ldt: Split out sanity check in map_ldt_struct()") Signed-off-by: Kirill A. Shutemov <kirill.shutemov@linux.intel.com> Signed-off-by: Thomas Gleixner <tglx@linutronix.de> Reviewed-by: Andy Lutomirski <luto@kernel.org> Cc: bp@alien8.de Cc: hpa@zytor.com Cc: dave.hansen@linux.intel.com Cc: peterz@infradead.org Cc: boris.ostrovsky@oracle.com Cc: jgross@suse.com Cc: bhe@redhat.com Cc: willy@infradead.org Cc: linux-mm@kvack.org Cc: stable@vger.kernel.org Link: https://lkml.kernel.org/r/20181026122856.66224-4-kirill.shutemov@linux.intel.com --- arch/x86/kernel/ldt.c | 8 -------- 1 file changed, 8 deletions(-) diff --git a/arch/x86/kernel/ldt.c b/arch/x86/kernel/ldt.c index 18e4525c5933e..6135ae8ce0364 100644 --- a/arch/x86/kernel/ldt.c +++ b/arch/x86/kernel/ldt.c @@ -207,7 +207,6 @@ map_ldt_struct(struct mm_struct *mm, struct ldt_struct *ldt, int slot) bool is_vmalloc; spinlock_t *ptl; int i, nr_pages; - pgd_t *pgd; if (!static_cpu_has(X86_FEATURE_PTI)) return 0; @@ -221,13 +220,6 @@ map_ldt_struct(struct mm_struct *mm, struct ldt_struct *ldt, int slot) /* Check if the current mappings are sane */ sanity_check_ldt_mapping(mm); - /* - * Did we already have the top level entry allocated? We can't - * use pgd_none() for this because it doens't do anything on - * 4-level page table kernels. - */ - pgd = pgd_offset(mm, LDT_BASE_ADDR); - is_vmalloc = is_vmalloc_addr(ldt->entries); nr_pages = DIV_ROUND_UP(ldt->nr_entries * LDT_ENTRY_SIZE, PAGE_SIZE); From a48777fdda7d13179979a889e1fb87655a783cc0 Mon Sep 17 00:00:00 2001 From: Eial Czerwacki <eial@scalemp.com> Date: Mon, 5 Nov 2018 19:31:54 +0200 Subject: [PATCH 196/443] x86/vsmp: Remove dependency on pv_irq_ops vSMP dependency on pv_irq_ops has been removed some years ago, but the code still deals with pv_irq_ops. In short, "cap & ctl & (1 << 4)" is always returning 0, so all PARAVIRT/PARAVIRT_XXL code related to that can be removed. However, the rest of the code depends on CONFIG_PCI, so fix it accordingly. Rename set_vsmp_pv_ops to set_vsmp_ctl as the original name does not make sense anymore. Signed-off-by: Eial Czerwacki <eial@scalemp.com> Signed-off-by: Thomas Gleixner <tglx@linutronix.de> Acked-by: Shai Fultheim <shai@scalemp.com> Cc: Juergen Gross <jgross@suse.com> Link: https://lkml.kernel.org/r/1541439114-28297-1-git-send-email-eial@scalemp.com --- arch/x86/Kconfig | 1 - arch/x86/kernel/vsmp_64.c | 84 ++++----------------------------------- 2 files changed, 7 insertions(+), 78 deletions(-) diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index ba7e3464ee923..9d734f3c8234d 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -525,7 +525,6 @@ config X86_VSMP bool "ScaleMP vSMP" select HYPERVISOR_GUEST select PARAVIRT - select PARAVIRT_XXL depends on X86_64 && PCI depends on X86_EXTENDED_PLATFORM depends on SMP diff --git a/arch/x86/kernel/vsmp_64.c b/arch/x86/kernel/vsmp_64.c index 1eae5af491c27..891a75dbc1313 100644 --- a/arch/x86/kernel/vsmp_64.c +++ b/arch/x86/kernel/vsmp_64.c @@ -26,65 +26,8 @@ #define TOPOLOGY_REGISTER_OFFSET 0x10 -#if defined CONFIG_PCI && defined CONFIG_PARAVIRT_XXL -/* - * Interrupt control on vSMPowered systems: - * ~AC is a shadow of IF. If IF is 'on' AC should be 'off' - * and vice versa. - */ - -asmlinkage __visible unsigned long vsmp_save_fl(void) -{ - unsigned long flags = native_save_fl(); - - if (!(flags & X86_EFLAGS_IF) || (flags & X86_EFLAGS_AC)) - flags &= ~X86_EFLAGS_IF; - return flags; -} -PV_CALLEE_SAVE_REGS_THUNK(vsmp_save_fl); - -__visible void vsmp_restore_fl(unsigned long flags) -{ - if (flags & X86_EFLAGS_IF) - flags &= ~X86_EFLAGS_AC; - else - flags |= X86_EFLAGS_AC; - native_restore_fl(flags); -} -PV_CALLEE_SAVE_REGS_THUNK(vsmp_restore_fl); - -asmlinkage __visible void vsmp_irq_disable(void) -{ - unsigned long flags = native_save_fl(); - - native_restore_fl((flags & ~X86_EFLAGS_IF) | X86_EFLAGS_AC); -} -PV_CALLEE_SAVE_REGS_THUNK(vsmp_irq_disable); - -asmlinkage __visible void vsmp_irq_enable(void) -{ - unsigned long flags = native_save_fl(); - - native_restore_fl((flags | X86_EFLAGS_IF) & (~X86_EFLAGS_AC)); -} -PV_CALLEE_SAVE_REGS_THUNK(vsmp_irq_enable); - -static unsigned __init vsmp_patch(u8 type, void *ibuf, - unsigned long addr, unsigned len) -{ - switch (type) { - case PARAVIRT_PATCH(irq.irq_enable): - case PARAVIRT_PATCH(irq.irq_disable): - case PARAVIRT_PATCH(irq.save_fl): - case PARAVIRT_PATCH(irq.restore_fl): - return paravirt_patch_default(type, ibuf, addr, len); - default: - return native_patch(type, ibuf, addr, len); - } - -} - -static void __init set_vsmp_pv_ops(void) +#ifdef CONFIG_PCI +static void __init set_vsmp_ctl(void) { void __iomem *address; unsigned int cap, ctl, cfg; @@ -109,28 +52,12 @@ static void __init set_vsmp_pv_ops(void) } #endif - if (cap & ctl & (1 << 4)) { - /* Setup irq ops and turn on vSMP IRQ fastpath handling */ - pv_ops.irq.irq_disable = PV_CALLEE_SAVE(vsmp_irq_disable); - pv_ops.irq.irq_enable = PV_CALLEE_SAVE(vsmp_irq_enable); - pv_ops.irq.save_fl = PV_CALLEE_SAVE(vsmp_save_fl); - pv_ops.irq.restore_fl = PV_CALLEE_SAVE(vsmp_restore_fl); - pv_ops.init.patch = vsmp_patch; - ctl &= ~(1 << 4); - } writel(ctl, address + 4); ctl = readl(address + 4); pr_info("vSMP CTL: control set to:0x%08x\n", ctl); early_iounmap(address, 8); } -#else -static void __init set_vsmp_pv_ops(void) -{ -} -#endif - -#ifdef CONFIG_PCI static int is_vsmp = -1; static void __init detect_vsmp_box(void) @@ -164,11 +91,14 @@ static int is_vsmp_box(void) { return 0; } +static void __init set_vsmp_ctl(void) +{ +} #endif static void __init vsmp_cap_cpus(void) { -#if !defined(CONFIG_X86_VSMP) && defined(CONFIG_SMP) +#if !defined(CONFIG_X86_VSMP) && defined(CONFIG_SMP) && defined(CONFIG_PCI) void __iomem *address; unsigned int cfg, topology, node_shift, maxcpus; @@ -221,6 +151,6 @@ void __init vsmp_init(void) vsmp_cap_cpus(); - set_vsmp_pv_ops(); + set_vsmp_ctl(); return; } From 86a484bda787d542d4a968bd7742bcf844c8adb2 Mon Sep 17 00:00:00 2001 From: Leo Li <sunpeng.li@amd.com> Date: Tue, 30 Oct 2018 15:09:08 -0400 Subject: [PATCH 197/443] drm/amd: Update atom_smu_info_v3_3 structure Mainly adding the WAFL spread spectrum info, for adjusting display clocks when XGMI is enabled. Signed-off-by: Leo Li <sunpeng.li@amd.com> Acked-by: Alex Deucher <alexander.deucher@amd.com> Signed-off-by: Alex Deucher <alexander.deucher@amd.com> --- drivers/gpu/drm/amd/include/atomfirmware.h | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/amd/include/atomfirmware.h b/drivers/gpu/drm/amd/include/atomfirmware.h index d2e7c0fa96c2f..8eb0bb241210b 100644 --- a/drivers/gpu/drm/amd/include/atomfirmware.h +++ b/drivers/gpu/drm/amd/include/atomfirmware.h @@ -1325,7 +1325,7 @@ struct atom_smu_info_v3_3 { struct atom_common_table_header table_header; uint8_t smuip_min_ver; uint8_t smuip_max_ver; - uint8_t smu_rsd1; + uint8_t waflclk_ss_mode; uint8_t gpuclk_ss_mode; uint16_t sclk_ss_percentage; uint16_t sclk_ss_rate_10hz; @@ -1355,7 +1355,10 @@ struct atom_smu_info_v3_3 { uint32_t syspll3_1_vco_freq_10khz; uint32_t bootup_fclk_10khz; uint32_t bootup_waflclk_10khz; - uint32_t reserved[3]; + uint32_t smu_info_caps; + uint16_t waflclk_ss_percentage; // in unit of 0.001% + uint16_t smuinitoffset; + uint32_t reserved; }; /* From ce317dd9f809c8da9656c88761e30f0a82a8c2e6 Mon Sep 17 00:00:00 2001 From: Anirudh Venkataramanan <anirudh.venkataramanan@intel.com> Date: Fri, 26 Oct 2018 10:40:50 -0700 Subject: [PATCH 198/443] ice: Set carrier state and start/stop queues in rebuild Set the carrier state post rebuild by querying the link status. Also start/stop queues based on link status. Signed-off-by: Anirudh Venkataramanan <anirudh.venkataramanan@intel.com> Tested-by: Andrew Bowers <andrewx.bowers@intel.com> Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com> --- drivers/net/ethernet/intel/ice/ice_main.c | 18 +++++++++++++++++- 1 file changed, 17 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/intel/ice/ice_main.c b/drivers/net/ethernet/intel/ice/ice_main.c index 05993451147a0..6d31ffb649405 100644 --- a/drivers/net/ethernet/intel/ice/ice_main.c +++ b/drivers/net/ethernet/intel/ice/ice_main.c @@ -3296,7 +3296,7 @@ static void ice_rebuild(struct ice_pf *pf) struct device *dev = &pf->pdev->dev; struct ice_hw *hw = &pf->hw; enum ice_status ret; - int err; + int err, i; if (test_bit(__ICE_DOWN, pf->state)) goto clear_recovery; @@ -3370,6 +3370,22 @@ static void ice_rebuild(struct ice_pf *pf) } ice_reset_all_vfs(pf, true); + + for (i = 0; i < pf->num_alloc_vsi; i++) { + bool link_up; + + if (!pf->vsi[i] || pf->vsi[i]->type != ICE_VSI_PF) + continue; + ice_get_link_status(pf->vsi[i]->port_info, &link_up); + if (link_up) { + netif_carrier_on(pf->vsi[i]->netdev); + netif_tx_wake_all_queues(pf->vsi[i]->netdev); + } else { + netif_carrier_off(pf->vsi[i]->netdev); + netif_tx_stop_all_queues(pf->vsi[i]->netdev); + } + } + /* if we get here, reset flow is successful */ clear_bit(__ICE_RESET_FAILED, pf->state); return; From afd9d4ab58db20029a75cf82f23b6a5641cd7d6f Mon Sep 17 00:00:00 2001 From: Anirudh Venkataramanan <anirudh.venkataramanan@intel.com> Date: Fri, 26 Oct 2018 10:40:51 -0700 Subject: [PATCH 199/443] ice: Check for reset in progress during remove The remove path does not currently check to see if a reset is in progress before proceeding. This can cause a resource collision resulting in various types of errors. Check for reset in progress and wait for a reasonable amount of time before allowing the remove to progress. Signed-off-by: Dave Ertman <david.m.ertman@intel.com> Signed-off-by: Anirudh Venkataramanan <anirudh.venkataramanan@intel.com> Tested-by: Andrew Bowers <andrewx.bowers@intel.com> Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com> --- drivers/net/ethernet/intel/ice/ice.h | 2 ++ drivers/net/ethernet/intel/ice/ice_main.c | 6 ++++++ 2 files changed, 8 insertions(+) diff --git a/drivers/net/ethernet/intel/ice/ice.h b/drivers/net/ethernet/intel/ice/ice.h index 4c4b5717a627d..e5b37fa608844 100644 --- a/drivers/net/ethernet/intel/ice/ice.h +++ b/drivers/net/ethernet/intel/ice/ice.h @@ -76,6 +76,8 @@ extern const char ice_drv_ver[]; #define ICE_MIN_INTR_PER_VF (ICE_MIN_QS_PER_VF + 1) #define ICE_DFLT_INTR_PER_VF (ICE_DFLT_QS_PER_VF + 1) +#define ICE_MAX_RESET_WAIT 20 + #define ICE_VSIQF_HKEY_ARRAY_SIZE ((VSIQF_HKEY_MAX_INDEX + 1) * 4) #define ICE_DFLT_NETIF_M (NETIF_MSG_DRV | NETIF_MSG_PROBE | NETIF_MSG_LINK) diff --git a/drivers/net/ethernet/intel/ice/ice_main.c b/drivers/net/ethernet/intel/ice/ice_main.c index 6d31ffb649405..aee22f11a41aa 100644 --- a/drivers/net/ethernet/intel/ice/ice_main.c +++ b/drivers/net/ethernet/intel/ice/ice_main.c @@ -2182,6 +2182,12 @@ static void ice_remove(struct pci_dev *pdev) if (!pf) return; + for (i = 0; i < ICE_MAX_RESET_WAIT; i++) { + if (!ice_is_reset_in_progress(pf->state)) + break; + msleep(100); + } + set_bit(__ICE_DOWN, pf->state); ice_service_task_stop(pf); From 0f5d4c21a50716f8bd4e220544b82dca7408d113 Mon Sep 17 00:00:00 2001 From: Akeem G Abodunrin <akeem.g.abodunrin@intel.com> Date: Fri, 26 Oct 2018 10:40:52 -0700 Subject: [PATCH 200/443] ice: Fix dead device link issue with flow control Setting Rx or Tx pause parameter currently results in link loss on the interface, requiring the platform/host to be cold power cycled. Fix it. Signed-off-by: Akeem G Abodunrin <akeem.g.abodunrin@intel.com> Signed-off-by: Anirudh Venkataramanan <anirudh.venkataramanan@intel.com> Tested-by: Andrew Bowers <andrewx.bowers@intel.com> Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com> --- drivers/net/ethernet/intel/ice/ice_ethtool.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/intel/ice/ice_ethtool.c b/drivers/net/ethernet/intel/ice/ice_ethtool.c index 96923580f2a6c..648acdb4c644b 100644 --- a/drivers/net/ethernet/intel/ice/ice_ethtool.c +++ b/drivers/net/ethernet/intel/ice/ice_ethtool.c @@ -1517,10 +1517,15 @@ ice_set_pauseparam(struct net_device *netdev, struct ethtool_pauseparam *pause) } if (!test_bit(__ICE_DOWN, pf->state)) { - /* Give it a little more time to try to come back */ + /* Give it a little more time to try to come back. If still + * down, restart autoneg link or reinitialize the interface. + */ msleep(75); if (!test_bit(__ICE_DOWN, pf->state)) return ice_nway_reset(netdev); + + ice_down(vsi); + ice_up(vsi); } return err; From 33e055fcc26909b1d66b5d1f334aee38356068d7 Mon Sep 17 00:00:00 2001 From: Victor Raj <victor.raj@intel.com> Date: Fri, 26 Oct 2018 10:40:53 -0700 Subject: [PATCH 201/443] ice: Free VSI contexts during for unload In the unload path, all VSIs are freed. Also free the related VSI contexts to prevent memory leaks. Signed-off-by: Victor Raj <victor.raj@intel.com> Signed-off-by: Anirudh Venkataramanan <anirudh.venkataramanan@intel.com> Tested-by: Andrew Bowers <andrewx.bowers@intel.com> Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com> --- drivers/net/ethernet/intel/ice/ice_common.c | 3 +++ drivers/net/ethernet/intel/ice/ice_switch.c | 12 ++++++++++++ drivers/net/ethernet/intel/ice/ice_switch.h | 2 ++ 3 files changed, 17 insertions(+) diff --git a/drivers/net/ethernet/intel/ice/ice_common.c b/drivers/net/ethernet/intel/ice/ice_common.c index 8cd6a2401fd9f..554fd707a6d69 100644 --- a/drivers/net/ethernet/intel/ice/ice_common.c +++ b/drivers/net/ethernet/intel/ice/ice_common.c @@ -811,6 +811,9 @@ void ice_deinit_hw(struct ice_hw *hw) /* Attempt to disable FW logging before shutting down control queues */ ice_cfg_fw_log(hw, false); ice_shutdown_all_ctrlq(hw); + + /* Clear VSI contexts if not already cleared */ + ice_clear_all_vsi_ctx(hw); } /** diff --git a/drivers/net/ethernet/intel/ice/ice_switch.c b/drivers/net/ethernet/intel/ice/ice_switch.c index 33403f39f1b3f..40c9c65589568 100644 --- a/drivers/net/ethernet/intel/ice/ice_switch.c +++ b/drivers/net/ethernet/intel/ice/ice_switch.c @@ -347,6 +347,18 @@ static void ice_clear_vsi_ctx(struct ice_hw *hw, u16 vsi_handle) } } +/** + * ice_clear_all_vsi_ctx - clear all the VSI context entries + * @hw: pointer to the hw struct + */ +void ice_clear_all_vsi_ctx(struct ice_hw *hw) +{ + u16 i; + + for (i = 0; i < ICE_MAX_VSI; i++) + ice_clear_vsi_ctx(hw, i); +} + /** * ice_add_vsi - add VSI context to the hardware and VSI handle list * @hw: pointer to the hw struct diff --git a/drivers/net/ethernet/intel/ice/ice_switch.h b/drivers/net/ethernet/intel/ice/ice_switch.h index b88d96a1ef693..d5ef0bd58bf97 100644 --- a/drivers/net/ethernet/intel/ice/ice_switch.h +++ b/drivers/net/ethernet/intel/ice/ice_switch.h @@ -190,6 +190,8 @@ ice_update_vsi(struct ice_hw *hw, u16 vsi_handle, struct ice_vsi_ctx *vsi_ctx, struct ice_sq_cd *cd); bool ice_is_vsi_valid(struct ice_hw *hw, u16 vsi_handle); struct ice_vsi_ctx *ice_get_vsi_ctx(struct ice_hw *hw, u16 vsi_handle); +void ice_clear_all_vsi_ctx(struct ice_hw *hw); +/* Switch config */ enum ice_status ice_get_initial_sw_cfg(struct ice_hw *hw); /* Switch/bridge related commands */ From 9ecd25c26810a61b9c3abc6c73de32dca6da96e1 Mon Sep 17 00:00:00 2001 From: Anirudh Venkataramanan <anirudh.venkataramanan@intel.com> Date: Fri, 26 Oct 2018 10:40:54 -0700 Subject: [PATCH 202/443] ice: Remove duplicate addition of VLANs in replay path ice_restore_vlan and active_vlans were originally put in place to reprogram VLAN filters in the replay path. This is now done as part of the much broader VSI rebuild/replay framework. So remove both ice_restore_vlan and active_vlans Signed-off-by: Anirudh Venkataramanan <anirudh.venkataramanan@intel.com> Tested-by: Andrew Bowers <andrewx.bowers@intel.com> Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com> --- drivers/net/ethernet/intel/ice/ice.h | 1 - drivers/net/ethernet/intel/ice/ice_main.c | 42 +++---------------- .../net/ethernet/intel/ice/ice_virtchnl_pf.c | 2 - 3 files changed, 6 insertions(+), 39 deletions(-) diff --git a/drivers/net/ethernet/intel/ice/ice.h b/drivers/net/ethernet/intel/ice/ice.h index e5b37fa608844..1639e955f1580 100644 --- a/drivers/net/ethernet/intel/ice/ice.h +++ b/drivers/net/ethernet/intel/ice/ice.h @@ -191,7 +191,6 @@ struct ice_vsi { u64 tx_linearize; DECLARE_BITMAP(state, __ICE_STATE_NBITS); DECLARE_BITMAP(flags, ICE_VSI_FLAG_NBITS); - unsigned long active_vlans[BITS_TO_LONGS(VLAN_N_VID)]; unsigned int current_netdev_flags; u32 tx_restart; u32 tx_busy; diff --git a/drivers/net/ethernet/intel/ice/ice_main.c b/drivers/net/ethernet/intel/ice/ice_main.c index aee22f11a41aa..338abb1b92338 100644 --- a/drivers/net/ethernet/intel/ice/ice_main.c +++ b/drivers/net/ethernet/intel/ice/ice_main.c @@ -1622,7 +1622,6 @@ static int ice_vlan_rx_add_vid(struct net_device *netdev, { struct ice_netdev_priv *np = netdev_priv(netdev); struct ice_vsi *vsi = np->vsi; - int ret; if (vid >= VLAN_N_VID) { netdev_err(netdev, "VLAN id requested %d is out of range %d\n", @@ -1635,7 +1634,8 @@ static int ice_vlan_rx_add_vid(struct net_device *netdev, /* Enable VLAN pruning when VLAN 0 is added */ if (unlikely(!vid)) { - ret = ice_cfg_vlan_pruning(vsi, true); + int ret = ice_cfg_vlan_pruning(vsi, true); + if (ret) return ret; } @@ -1644,12 +1644,7 @@ static int ice_vlan_rx_add_vid(struct net_device *netdev, * needed to continue allowing all untagged packets since VLAN prune * list is applied to all packets by the switch */ - ret = ice_vsi_add_vlan(vsi, vid); - - if (!ret) - set_bit(vid, vsi->active_vlans); - - return ret; + return ice_vsi_add_vlan(vsi, vid); } /** @@ -1677,8 +1672,6 @@ static int ice_vlan_rx_kill_vid(struct net_device *netdev, if (status) return status; - clear_bit(vid, vsi->active_vlans); - /* Disable VLAN pruning when VLAN 0 is removed */ if (unlikely(!vid)) status = ice_cfg_vlan_pruning(vsi, false); @@ -2515,31 +2508,6 @@ static int ice_vsi_vlan_setup(struct ice_vsi *vsi) return ret; } -/** - * ice_restore_vlan - Reinstate VLANs when vsi/netdev comes back up - * @vsi: the VSI being brought back up - */ -static int ice_restore_vlan(struct ice_vsi *vsi) -{ - int err; - u16 vid; - - if (!vsi->netdev) - return -EINVAL; - - err = ice_vsi_vlan_setup(vsi); - if (err) - return err; - - for_each_set_bit(vid, vsi->active_vlans, VLAN_N_VID) { - err = ice_vlan_rx_add_vid(vsi->netdev, htons(ETH_P_8021Q), vid); - if (err) - break; - } - - return err; -} - /** * ice_vsi_cfg - Setup the VSI * @vsi: the VSI being configured @@ -2552,7 +2520,9 @@ static int ice_vsi_cfg(struct ice_vsi *vsi) if (vsi->netdev) { ice_set_rx_mode(vsi->netdev); - err = ice_restore_vlan(vsi); + + err = ice_vsi_vlan_setup(vsi); + if (err) return err; } diff --git a/drivers/net/ethernet/intel/ice/ice_virtchnl_pf.c b/drivers/net/ethernet/intel/ice/ice_virtchnl_pf.c index 45f10f8f01dc1..9576b958622b3 100644 --- a/drivers/net/ethernet/intel/ice/ice_virtchnl_pf.c +++ b/drivers/net/ethernet/intel/ice/ice_virtchnl_pf.c @@ -2171,7 +2171,6 @@ static int ice_vc_process_vlan_msg(struct ice_vf *vf, u8 *msg, bool add_v) if (!ice_vsi_add_vlan(vsi, vid)) { vf->num_vlan++; - set_bit(vid, vsi->active_vlans); /* Enable VLAN pruning when VLAN 0 is added */ if (unlikely(!vid)) @@ -2190,7 +2189,6 @@ static int ice_vc_process_vlan_msg(struct ice_vf *vf, u8 *msg, bool add_v) */ if (!ice_vsi_kill_vlan(vsi, vid)) { vf->num_vlan--; - clear_bit(vid, vsi->active_vlans); /* Disable VLAN pruning when removing VLAN 0 */ if (unlikely(!vid)) From 58297dd133f64ea028e7d52dd00cd9ff8aa4479f Mon Sep 17 00:00:00 2001 From: Md Fahad Iqbal Polash <md.fahad.iqbal.polash@intel.com> Date: Fri, 26 Oct 2018 10:40:55 -0700 Subject: [PATCH 203/443] ice: Fix flags for port VLAN According to the spec, whenever insert PVID field is set, the VLAN driver insertion mode should be set to 01b which isn't done currently. Fix it. Signed-off-by: Md Fahad Iqbal Polash <md.fahad.iqbal.polash@intel.com> Signed-off-by: Anirudh Venkataramanan <anirudh.venkataramanan@intel.com> Tested-by: Andrew Bowers <andrewx.bowers@intel.com> Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com> --- drivers/net/ethernet/intel/ice/ice_virtchnl_pf.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/intel/ice/ice_virtchnl_pf.c b/drivers/net/ethernet/intel/ice/ice_virtchnl_pf.c index 9576b958622b3..e71065f9d3918 100644 --- a/drivers/net/ethernet/intel/ice/ice_virtchnl_pf.c +++ b/drivers/net/ethernet/intel/ice/ice_virtchnl_pf.c @@ -348,7 +348,7 @@ static int ice_vsi_set_pvid(struct ice_vsi *vsi, u16 vid) struct ice_vsi_ctx ctxt = { 0 }; enum ice_status status; - ctxt.info.vlan_flags = ICE_AQ_VSI_VLAN_MODE_TAGGED | + ctxt.info.vlan_flags = ICE_AQ_VSI_VLAN_MODE_UNTAGGED | ICE_AQ_VSI_PVLAN_INSERT_PVID | ICE_AQ_VSI_VLAN_EMOD_STR; ctxt.info.pvid = cpu_to_le16(vid); From 31082519c11b01fe1fb6dd512055f252812c1508 Mon Sep 17 00:00:00 2001 From: Anirudh Venkataramanan <anirudh.venkataramanan@intel.com> Date: Fri, 26 Oct 2018 10:40:56 -0700 Subject: [PATCH 204/443] ice: Fix typo in error message Print should say "Enabling" instead of "Enaabling" Signed-off-by: Akeem G Abodunrin <akeem.g.abodunrin@intel.com> Signed-off-by: Anirudh Venkataramanan <anirudh.venkataramanan@intel.com> Tested-by: Andrew Bowers <andrewx.bowers@intel.com> Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com> --- drivers/net/ethernet/intel/ice/ice_lib.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/intel/ice/ice_lib.c b/drivers/net/ethernet/intel/ice/ice_lib.c index 5bacad01f0c9c..c604a44c8cfbd 100644 --- a/drivers/net/ethernet/intel/ice/ice_lib.c +++ b/drivers/net/ethernet/intel/ice/ice_lib.c @@ -1997,7 +1997,7 @@ int ice_cfg_vlan_pruning(struct ice_vsi *vsi, bool ena) status = ice_update_vsi(&vsi->back->hw, vsi->idx, ctxt, NULL); if (status) { netdev_err(vsi->netdev, "%sabling VLAN pruning on VSI handle: %d, VSI HW ID: %d failed, err = %d, aq_err = %d\n", - ena ? "Ena" : "Dis", vsi->idx, vsi->vsi_num, status, + ena ? "En" : "Dis", vsi->idx, vsi->vsi_num, status, vsi->back->hw.adminq.sq_last_status); goto err_out; } From 25525b69bb44a628841492f44a5a8e74f34724f4 Mon Sep 17 00:00:00 2001 From: Dave Ertman <david.m.ertman@intel.com> Date: Fri, 26 Oct 2018 10:40:57 -0700 Subject: [PATCH 205/443] ice: Fix napi delete calls for remove In the remove path, the vsi->netdev is being set to NULL before the call to free vectors. This is causing the netif_napi_del call to never be made. Add a call to ice_napi_del to the same location as the calls to unregister_netdev and just prior to them. This will use the reverse flow as the register and netif_napi_add calls. Signed-off-by: Dave Ertman <david.m.ertman@intel.com> Signed-off-by: Anirudh Venkataramanan <anirudh.venkataramanan@intel.com> Tested-by: Andrew Bowers <andrewx.bowers@intel.com> Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com> --- drivers/net/ethernet/intel/ice/ice.h | 1 + drivers/net/ethernet/intel/ice/ice_lib.c | 1 + drivers/net/ethernet/intel/ice/ice_main.c | 2 +- 3 files changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/intel/ice/ice.h b/drivers/net/ethernet/intel/ice/ice.h index 1639e955f1580..b8548370f1c72 100644 --- a/drivers/net/ethernet/intel/ice/ice.h +++ b/drivers/net/ethernet/intel/ice/ice.h @@ -370,5 +370,6 @@ int ice_set_rss(struct ice_vsi *vsi, u8 *seed, u8 *lut, u16 lut_size); int ice_get_rss(struct ice_vsi *vsi, u8 *seed, u8 *lut, u16 lut_size); void ice_fill_rss_lut(u8 *lut, u16 rss_table_size, u16 rss_size); void ice_print_link_msg(struct ice_vsi *vsi, bool isup); +void ice_napi_del(struct ice_vsi *vsi); #endif /* _ICE_H_ */ diff --git a/drivers/net/ethernet/intel/ice/ice_lib.c b/drivers/net/ethernet/intel/ice/ice_lib.c index c604a44c8cfbd..1041fa2a77678 100644 --- a/drivers/net/ethernet/intel/ice/ice_lib.c +++ b/drivers/net/ethernet/intel/ice/ice_lib.c @@ -2458,6 +2458,7 @@ int ice_vsi_release(struct ice_vsi *vsi) * on this wq */ if (vsi->netdev && !ice_is_reset_in_progress(pf->state)) { + ice_napi_del(vsi); unregister_netdev(vsi->netdev); free_netdev(vsi->netdev); vsi->netdev = NULL; diff --git a/drivers/net/ethernet/intel/ice/ice_main.c b/drivers/net/ethernet/intel/ice/ice_main.c index 338abb1b92338..82f49dbd762c6 100644 --- a/drivers/net/ethernet/intel/ice/ice_main.c +++ b/drivers/net/ethernet/intel/ice/ice_main.c @@ -1465,7 +1465,7 @@ static int ice_req_irq_msix_misc(struct ice_pf *pf) * ice_napi_del - Remove NAPI handler for the VSI * @vsi: VSI for which NAPI handler is to be removed */ -static void ice_napi_del(struct ice_vsi *vsi) +void ice_napi_del(struct ice_vsi *vsi) { int v_idx; From c585ea42ec75e8d3afa278b7095d9f0dd6ee515b Mon Sep 17 00:00:00 2001 From: Brett Creeley <brett.creeley@intel.com> Date: Fri, 26 Oct 2018 10:40:58 -0700 Subject: [PATCH 206/443] ice: Fix tx_timeout in PF driver Prior to this commit the driver was running into tx_timeouts when a queue was stressed enough. This was happening because the HW tail and SW tail (NTU) were incorrectly out of sync. Consequently this was causing the HW head to collide with the HW tail, which to the hardware means that all descriptors posted for Tx have been processed. Due to the Tx logic used in the driver SW tail and HW tail are allowed to be out of sync. This is done as an optimization because it allows the driver to write HW tail as infrequently as possible, while still updating the SW tail index to keep track. However, there are situations where this results in the tail never getting updated, resulting in Tx timeouts. Tx HW tail write condition: if (netif_xmit_stopped(txring_txq(tx_ring) || !skb->xmit_more) writel(sw_tail, tx_ring->tail); An issue was found in the Tx logic that was causing the afore mentioned condition for updating HW tail to never happen, causing tx_timeouts. In ice_xmit_frame_ring we calculate how many descriptors we need for the Tx transaction based on the skb the kernel hands us. This is then passed into ice_maybe_stop_tx along with some extra padding to determine if we have enough descriptors available for this transaction. If we don't then we return -EBUSY to the stack, otherwise we move on and eventually prepare the Tx descriptors accordingly in ice_tx_map and set next_to_watch. In ice_tx_map we make another call to ice_maybe_stop_tx with a value of MAX_SKB_FRAGS + 4. The key here is that this value is possibly less than the value we sent in the first call to ice_maybe_stop_tx in ice_xmit_frame_ring. Now, if the number of unused descriptors is between MAX_SKB_FRAGS + 4 and the value used in the first call to ice_maybe_stop_tx in ice_xmit_frame_ring then we do not update the HW tail because of the "Tx HW tail write condition" above. This is because in ice_maybe_stop_tx we return success from ice_maybe_stop_tx instead of calling __ice_maybe_stop_tx and subsequently calling netif_stop_subqueue, which sets the __QUEUE_STATE_DEV_XOFF bit. This bit is then checked in the "Tx HW tail write condition" by calling netif_xmit_stopped and subsequently updating HW tail if the afore mentioned bit is set. In ice_clean_tx_irq, if next_to_watch is not NULL, we end up cleaning the descriptors that HW sets the DD bit on and we have the budget. The HW head will eventually run into the HW tail in response to the description in the paragraph above. The next time through ice_xmit_frame_ring we make the initial call to ice_maybe_stop_tx with another skb from the stack. This time we do not have enough descriptors available and we return NETDEV_TX_BUSY to the stack and end up setting next_to_watch to NULL. This is where we are stuck. In ice_clean_tx_irq we never clean anything because next_to_watch is always NULL and in ice_xmit_frame_ring we never update HW tail because we already return NETDEV_TX_BUSY to the stack and eventually we hit a tx_timeout. This issue was fixed by making sure that the second call to ice_maybe_stop_tx in ice_tx_map is passed a value that is >= the value that was used on the initial call to ice_maybe_stop_tx in ice_xmit_frame_ring. This was done by adding the following defines to make the logic more clear and to reduce the chance of mucking this up again: ICE_CACHE_LINE_BYTES 64 ICE_DESCS_PER_CACHE_LINE (ICE_CACHE_LINE_BYTES / \ sizeof(struct ice_tx_desc)) ICE_DESCS_FOR_CTX_DESC 1 ICE_DESCS_FOR_SKB_DATA_PTR 1 The ICE_CACHE_LINE_BYTES being 64 is an assumption being made so we don't have to figure this out on every pass through the Tx path. Instead I added a sanity check in ice_probe to verify cache line size and print a message if it's not 64 Bytes. This will make it easier to file issues if they are seen when the cache line size is not 64 Bytes when reading from the GLPCI_CNF2 register. Signed-off-by: Brett Creeley <brett.creeley@intel.com> Signed-off-by: Anirudh Venkataramanan <anirudh.venkataramanan@intel.com> Tested-by: Andrew Bowers <andrewx.bowers@intel.com> Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com> --- .../net/ethernet/intel/ice/ice_hw_autogen.h | 2 ++ drivers/net/ethernet/intel/ice/ice_main.c | 18 ++++++++++++++++++ drivers/net/ethernet/intel/ice/ice_txrx.c | 9 +++++---- drivers/net/ethernet/intel/ice/ice_txrx.h | 17 +++++++++++++++-- 4 files changed, 40 insertions(+), 6 deletions(-) diff --git a/drivers/net/ethernet/intel/ice/ice_hw_autogen.h b/drivers/net/ethernet/intel/ice/ice_hw_autogen.h index 5fdea6ec7675b..596b9fb1c510d 100644 --- a/drivers/net/ethernet/intel/ice/ice_hw_autogen.h +++ b/drivers/net/ethernet/intel/ice/ice_hw_autogen.h @@ -242,6 +242,8 @@ #define GLNVM_ULD 0x000B6008 #define GLNVM_ULD_CORER_DONE_M BIT(3) #define GLNVM_ULD_GLOBR_DONE_M BIT(4) +#define GLPCI_CNF2 0x000BE004 +#define GLPCI_CNF2_CACHELINE_SIZE_M BIT(1) #define PF_FUNC_RID 0x0009E880 #define PF_FUNC_RID_FUNC_NUM_S 0 #define PF_FUNC_RID_FUNC_NUM_M ICE_M(0x7, 0) diff --git a/drivers/net/ethernet/intel/ice/ice_main.c b/drivers/net/ethernet/intel/ice/ice_main.c index 82f49dbd762c6..333312a1d5957 100644 --- a/drivers/net/ethernet/intel/ice/ice_main.c +++ b/drivers/net/ethernet/intel/ice/ice_main.c @@ -1994,6 +1994,22 @@ static int ice_init_interrupt_scheme(struct ice_pf *pf) return 0; } +/** + * ice_verify_cacheline_size - verify driver's assumption of 64 Byte cache lines + * @pf: pointer to the PF structure + * + * There is no error returned here because the driver should be able to handle + * 128 Byte cache lines, so we only print a warning in case issues are seen, + * specifically with Tx. + */ +static void ice_verify_cacheline_size(struct ice_pf *pf) +{ + if (rd32(&pf->hw, GLPCI_CNF2) & GLPCI_CNF2_CACHELINE_SIZE_M) + dev_warn(&pf->pdev->dev, + "%d Byte cache line assumption is invalid, driver may have Tx timeouts!\n", + ICE_CACHE_LINE_BYTES); +} + /** * ice_probe - Device initialization routine * @pdev: PCI device information struct @@ -2144,6 +2160,8 @@ static int ice_probe(struct pci_dev *pdev, /* since everything is good, start the service timer */ mod_timer(&pf->serv_tmr, round_jiffies(jiffies + pf->serv_tmr_period)); + ice_verify_cacheline_size(pf); + return 0; err_alloc_sw_unroll: diff --git a/drivers/net/ethernet/intel/ice/ice_txrx.c b/drivers/net/ethernet/intel/ice/ice_txrx.c index 5dae968d853e1..3387c67c848de 100644 --- a/drivers/net/ethernet/intel/ice/ice_txrx.c +++ b/drivers/net/ethernet/intel/ice/ice_txrx.c @@ -1556,15 +1556,15 @@ int ice_tso(struct ice_tx_buf *first, struct ice_tx_offload_params *off) * magnitude greater than our largest possible GSO size. * * This would then be implemented as: - * return (((size >> 12) * 85) >> 8) + 1; + * return (((size >> 12) * 85) >> 8) + ICE_DESCS_FOR_SKB_DATA_PTR; * * Since multiplication and division are commutative, we can reorder * operations into: - * return ((size * 85) >> 20) + 1; + * return ((size * 85) >> 20) + ICE_DESCS_FOR_SKB_DATA_PTR; */ static unsigned int ice_txd_use_count(unsigned int size) { - return ((size * 85) >> 20) + 1; + return ((size * 85) >> 20) + ICE_DESCS_FOR_SKB_DATA_PTR; } /** @@ -1706,7 +1706,8 @@ ice_xmit_frame_ring(struct sk_buff *skb, struct ice_ring *tx_ring) * + 1 desc for context descriptor, * otherwise try next time */ - if (ice_maybe_stop_tx(tx_ring, count + 4 + 1)) { + if (ice_maybe_stop_tx(tx_ring, count + ICE_DESCS_PER_CACHE_LINE + + ICE_DESCS_FOR_CTX_DESC)) { tx_ring->tx_stats.tx_busy++; return NETDEV_TX_BUSY; } diff --git a/drivers/net/ethernet/intel/ice/ice_txrx.h b/drivers/net/ethernet/intel/ice/ice_txrx.h index 1d0f58bd389bd..75d0eaf6c9ddb 100644 --- a/drivers/net/ethernet/intel/ice/ice_txrx.h +++ b/drivers/net/ethernet/intel/ice/ice_txrx.h @@ -22,8 +22,21 @@ #define ICE_RX_BUF_WRITE 16 /* Must be power of 2 */ #define ICE_MAX_TXQ_PER_TXQG 128 -/* Tx Descriptors needed, worst case */ -#define DESC_NEEDED (MAX_SKB_FRAGS + 4) +/* We are assuming that the cache line is always 64 Bytes here for ice. + * In order to make sure that is a correct assumption there is a check in probe + * to print a warning if the read from GLPCI_CNF2 tells us that the cache line + * size is 128 bytes. We do it this way because we do not want to read the + * GLPCI_CNF2 register or a variable containing the value on every pass through + * the Tx path. + */ +#define ICE_CACHE_LINE_BYTES 64 +#define ICE_DESCS_PER_CACHE_LINE (ICE_CACHE_LINE_BYTES / \ + sizeof(struct ice_tx_desc)) +#define ICE_DESCS_FOR_CTX_DESC 1 +#define ICE_DESCS_FOR_SKB_DATA_PTR 1 +/* Tx descriptors needed, worst case */ +#define DESC_NEEDED (MAX_SKB_FRAGS + ICE_DESCS_FOR_CTX_DESC + \ + ICE_DESCS_PER_CACHE_LINE + ICE_DESCS_FOR_SKB_DATA_PTR) #define ICE_DESC_UNUSED(R) \ ((((R)->next_to_clean > (R)->next_to_use) ? 0 : (R)->count) + \ (R)->next_to_clean - (R)->next_to_use - 1) From d944b46992f8e99b6bdc721e44b02e5ca294fa2b Mon Sep 17 00:00:00 2001 From: Brett Creeley <brett.creeley@intel.com> Date: Fri, 26 Oct 2018 10:40:59 -0700 Subject: [PATCH 207/443] ice: Fix the bytecount sent to netdev_tx_sent_queue Currently if the driver does a TSO offload the bytecount sent to netdev_tx_sent_queue will be incorrect. This is because in ice_tso we overwrite the initial value that we set in ice_tx_map. This creates a mismatch between the Tx and Tx clean flow. In the Tx clean flow we calculate the bytecount (called total_bytes) as we clean the descriptors so the value used in the Tx clean path is correct. Fix this by using += in ice_tso instead of =. This fixes the mismatch in bytecount mentioned above. Signed-off-by: Brett Creeley <brett.creeley@intel.com> Signed-off-by: Anirudh Venkataramanan <anirudh.venkataramanan@intel.com> Tested-by: Andrew Bowers <andrewx.bowers@intel.com> Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com> --- drivers/net/ethernet/intel/ice/ice_txrx.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/intel/ice/ice_txrx.c b/drivers/net/ethernet/intel/ice/ice_txrx.c index 3387c67c848de..fe5bbabbb41ea 100644 --- a/drivers/net/ethernet/intel/ice/ice_txrx.c +++ b/drivers/net/ethernet/intel/ice/ice_txrx.c @@ -1520,7 +1520,7 @@ int ice_tso(struct ice_tx_buf *first, struct ice_tx_offload_params *off) /* update gso_segs and bytecount */ first->gso_segs = skb_shinfo(skb)->gso_segs; - first->bytecount = (first->gso_segs - 1) * off->header_len; + first->bytecount += (first->gso_segs - 1) * off->header_len; cd_tso_len = skb->len - off->header_len; cd_mss = skb_shinfo(skb)->gso_size; From 4c9b658eeaefedd402a59e858d8ac3bfdf6153e3 Mon Sep 17 00:00:00 2001 From: Miroslav Lichvar <mlichvar@redhat.com> Date: Fri, 26 Oct 2018 19:13:00 +0200 Subject: [PATCH 208/443] igb: shorten maximum PHC timecounter update interval The timecounter needs to be updated at least once per ~550 seconds in order to avoid a 40-bit SYSTIM timestamp to be misinterpreted as an old timestamp. Since commit 500462a9de65 ("timers: Switch to a non-cascading wheel"), scheduling of delayed work seems to be less accurate and a requested delay of 540 seconds may actually be longer than 550 seconds. Also, the PHC may be adjusted to run up to 6% faster than real time and the system clock up to 10% slower. Shorten the delay to 360 seconds to be sure the timecounter is updated in time. This fixes an issue with HW timestamps on 82580/I350/I354 being off by ~1100 seconds for few seconds every ~9 minutes. Cc: Thomas Gleixner <tglx@linutronix.de> Signed-off-by: Miroslav Lichvar <mlichvar@redhat.com> Acked-by: Jacob Keller <jacob.e.keller@intel.com> Acked-by: Richard Cochran <richardcochran@gmail.com> Tested-by: Aaron Brown <aaron.f.brown@intel.com> Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com> --- drivers/net/ethernet/intel/igb/igb_ptp.c | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/drivers/net/ethernet/intel/igb/igb_ptp.c b/drivers/net/ethernet/intel/igb/igb_ptp.c index 29ced6b74d364..2b95dc9c7a6a8 100644 --- a/drivers/net/ethernet/intel/igb/igb_ptp.c +++ b/drivers/net/ethernet/intel/igb/igb_ptp.c @@ -53,13 +53,15 @@ * 2^40 * 10^-9 / 60 = 18.3 minutes. * * SYSTIM is converted to real time using a timecounter. As - * timecounter_cyc2time() allows old timestamps, the timecounter - * needs to be updated at least once per half of the SYSTIM interval. - * Scheduling of delayed work is not very accurate, so we aim for 8 - * minutes to be sure the actual interval is shorter than 9.16 minutes. + * timecounter_cyc2time() allows old timestamps, the timecounter needs + * to be updated at least once per half of the SYSTIM interval. + * Scheduling of delayed work is not very accurate, and also the NIC + * clock can be adjusted to run up to 6% faster and the system clock + * up to 10% slower, so we aim for 6 minutes to be sure the actual + * interval in the NIC time is shorter than 9.16 minutes. */ -#define IGB_SYSTIM_OVERFLOW_PERIOD (HZ * 60 * 8) +#define IGB_SYSTIM_OVERFLOW_PERIOD (HZ * 60 * 6) #define IGB_PTP_TX_TIMEOUT (HZ * 15) #define INCPERIOD_82576 BIT(E1000_TIMINCA_16NS_SHIFT) #define INCVALUE_82576_MASK GENMASK(E1000_TIMINCA_16NS_SHIFT - 1, 0) From 81bd415c91eb966118d773dddf254aebf3022411 Mon Sep 17 00:00:00 2001 From: Mathieu Malaterre <malat@debian.org> Date: Wed, 6 Jun 2018 21:42:32 +0200 Subject: [PATCH 209/443] watchdog/core: Add missing prototypes for weak functions MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The split out of the hard lockup detector exposed two new weak functions, but no prototypes for them, which triggers the build warning: kernel/watchdog.c:109:12: warning: no previous prototype for ‘watchdog_nmi_enable’ [-Wmissing-prototypes] kernel/watchdog.c:115:13: warning: no previous prototype for ‘watchdog_nmi_disable’ [-Wmissing-prototypes] Add the prototypes. Fixes: 73ce0511c436 ("kernel/watchdog.c: move hardlockup detector to separate file") Signed-off-by: Mathieu Malaterre <malat@debian.org> Signed-off-by: Thomas Gleixner <tglx@linutronix.de> Cc: Babu Moger <babu.moger@oracle.com> Cc: stable@vger.kernel.org Link: https://lkml.kernel.org/r/20180606194232.17653-1-malat@debian.org --- include/linux/nmi.h | 2 ++ 1 file changed, 2 insertions(+) diff --git a/include/linux/nmi.h b/include/linux/nmi.h index 08f9247e9827e..9003e29cde461 100644 --- a/include/linux/nmi.h +++ b/include/linux/nmi.h @@ -119,6 +119,8 @@ static inline int hardlockup_detector_perf_init(void) { return 0; } void watchdog_nmi_stop(void); void watchdog_nmi_start(void); int watchdog_nmi_probe(void); +int watchdog_nmi_enable(unsigned int cpu); +void watchdog_nmi_disable(unsigned int cpu); /** * touch_nmi_watchdog - restart NMI watchdog timeout. From f348e2241fb73515d65b5d77dd9c174128a7fbf2 Mon Sep 17 00:00:00 2001 From: Vasily Averin <vvs@virtuozzo.com> Date: Tue, 6 Nov 2018 16:16:01 -0500 Subject: [PATCH 210/443] ext4: fix missing cleanup if ext4_alloc_flex_bg_array() fails while resizing Fixes: 117fff10d7f1 ("ext4: grow the s_flex_groups array as needed ...") Signed-off-by: Vasily Averin <vvs@virtuozzo.com> Signed-off-by: Theodore Ts'o <tytso@mit.edu> Cc: stable@kernel.org # 3.7 --- fs/ext4/resize.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/ext4/resize.c b/fs/ext4/resize.c index 3df326ee6d506..5fee65afd58b8 100644 --- a/fs/ext4/resize.c +++ b/fs/ext4/resize.c @@ -2022,7 +2022,7 @@ int ext4_resize_fs(struct super_block *sb, ext4_fsblk_t n_blocks_count) err = ext4_alloc_flex_bg_array(sb, n_group + 1); if (err) - return err; + goto out; err = ext4_mb_alloc_groupinfo(sb, n_group + 1); if (err) From db6aee62406d9fbb53315fcddd81f1dc271d49fa Mon Sep 17 00:00:00 2001 From: Vasily Averin <vvs@virtuozzo.com> Date: Tue, 6 Nov 2018 16:20:40 -0500 Subject: [PATCH 211/443] ext4: fix possible inode leak in the retry loop of ext4_resize_fs() Fixes: 1c6bd7173d66 ("ext4: convert file system to meta_bg if needed ...") Signed-off-by: Vasily Averin <vvs@virtuozzo.com> Signed-off-by: Theodore Ts'o <tytso@mit.edu> Cc: stable@kernel.org # 3.7 --- fs/ext4/resize.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/fs/ext4/resize.c b/fs/ext4/resize.c index 5fee65afd58b8..85158e9de7c23 100644 --- a/fs/ext4/resize.c +++ b/fs/ext4/resize.c @@ -2058,6 +2058,10 @@ int ext4_resize_fs(struct super_block *sb, ext4_fsblk_t n_blocks_count) n_blocks_count_retry = 0; free_flex_gd(flex_gd); flex_gd = NULL; + if (resize_inode) { + iput(resize_inode); + resize_inode = NULL; + } goto retry; } From 63088da9472854408ae5d7c47bd011daf9e1a81b Mon Sep 17 00:00:00 2001 From: Alex Deucher <alexander.deucher@amd.com> Date: Tue, 6 Nov 2018 16:06:32 -0500 Subject: [PATCH 212/443] Revert "drm/amd/display: set backlight level limit to 1" This reverts commit 0cafc82fae41531b0162150f9a97f2c74f97118f. This breaks some apps that assume 0 is minimum brightness. Revert for 4.20. This is fixed properly for drm-next/4.21 in: "drm/amd: Don't fail on backlight = 0" However, that patch depends on more extensive changes to the backlight interface which are too invasive for -fixes. Fixes: Bugzilla: https://bugs.freedesktop.org/108668 Signed-off-by: Alex Deucher <alexander.deucher@amd.com> --- drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c | 7 ------- 1 file changed, 7 deletions(-) diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c index b0df6dc9a775f..e224f23e22155 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c @@ -1524,13 +1524,6 @@ static int amdgpu_dm_backlight_update_status(struct backlight_device *bd) { struct amdgpu_display_manager *dm = bl_get_data(bd); - /* - * PWM interperts 0 as 100% rather than 0% because of HW - * limitation for level 0.So limiting minimum brightness level - * to 1. - */ - if (bd->props.brightness < 1) - return 1; if (dc_link_set_backlight_level(dm->backlight_link, bd->props.brightness, 0, 0)) return 0; From 8ed4ec32d5b1f04a641978322a38a8d7089553bb Mon Sep 17 00:00:00 2001 From: Shaokun Zhang <zhangshaokun@hisilicon.com> Date: Mon, 5 Nov 2018 18:33:35 +0800 Subject: [PATCH 213/443] drm/amd/display: Fix misleading buffer information RETIMER_REDRIVER_INFO shows the buffer as a decimal value with a '0x' prefix, which is somewhat misleading. Fix it to print hexadecimal, as was intended. Fixes: 2f14bc89("drm/amd/display: add retimer log for HWQ tuning use.") Cc: Charlene Liu <charlene.liu@amd.com> Cc: Dmytro Laktyushkin <Dmytro.Laktyushkin@amd.com> Signed-off-by: Shaokun Zhang <zhangshaokun@hisilicon.com> Reviewed-by: Leo Li <sunpeng.li@amd.com> Signed-off-by: Alex Deucher <alexander.deucher@amd.com> --- drivers/gpu/drm/amd/display/dc/core/dc_link.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_link.c b/drivers/gpu/drm/amd/display/dc/core/dc_link.c index fb04a4ad141fd..5da2186b3615f 100644 --- a/drivers/gpu/drm/amd/display/dc/core/dc_link.c +++ b/drivers/gpu/drm/amd/display/dc/core/dc_link.c @@ -1722,7 +1722,7 @@ static void write_i2c_retimer_setting( i2c_success = i2c_write(pipe_ctx, slave_address, buffer, sizeof(buffer)); RETIMER_REDRIVER_INFO("retimer write to slave_address = 0x%x,\ - offset = 0x%d, reg_val = 0x%d, i2c_success = %d\n", + offset = 0x%x, reg_val = 0x%x, i2c_success = %d\n", slave_address, buffer[0], buffer[1], i2c_success?1:0); if (!i2c_success) /* Write failure */ @@ -1734,7 +1734,7 @@ static void write_i2c_retimer_setting( i2c_success = i2c_write(pipe_ctx, slave_address, buffer, sizeof(buffer)); RETIMER_REDRIVER_INFO("retimer write to slave_address = 0x%x,\ - offset = 0x%d, reg_val = 0x%d, i2c_success = %d\n", + offset = 0x%x, reg_val = 0x%x, i2c_success = %d\n", slave_address, buffer[0], buffer[1], i2c_success?1:0); if (!i2c_success) /* Write failure */ From 02680efbb10be0d2c867fe722ae23d588f6bebef Mon Sep 17 00:00:00 2001 From: Harry Wentland <harry.wentland@amd.com> Date: Sun, 7 Oct 2018 10:01:23 -0400 Subject: [PATCH 214/443] drm/amd/display: Stop leaking planes [Why] drm_plane_cleanup does not free the plane. [How] Call drm_primary_helper_destroy which will also free the plane. Signed-off-by: Harry Wentland <harry.wentland@amd.com> Acked-by: Alex Deucher <alexander.deucher@amd.com> Signed-off-by: Alex Deucher <alexander.deucher@amd.com> --- drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c index e224f23e22155..289ddd52cac48 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c @@ -3301,7 +3301,7 @@ void dm_drm_plane_destroy_state(struct drm_plane *plane, static const struct drm_plane_funcs dm_plane_funcs = { .update_plane = drm_atomic_helper_update_plane, .disable_plane = drm_atomic_helper_disable_plane, - .destroy = drm_plane_cleanup, + .destroy = drm_primary_helper_destroy, .reset = dm_drm_plane_reset, .atomic_duplicate_state = dm_drm_plane_duplicate_state, .atomic_destroy_state = dm_drm_plane_destroy_state, From 3426d66d3e74ab0ab264a85e0795a17e3dde1e71 Mon Sep 17 00:00:00 2001 From: Alex Deucher <alexander.deucher@amd.com> Date: Tue, 6 Nov 2018 11:19:00 -0500 Subject: [PATCH 215/443] drm/amdgpu/vega20: add CLK base offset In case we need to access CLK registers. Acked-by: Harry Wentland <harry.wentland@amd.com> Signed-off-by: Alex Deucher <alexander.deucher@amd.com> --- drivers/gpu/drm/amd/amdgpu/vega20_reg_init.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/gpu/drm/amd/amdgpu/vega20_reg_init.c b/drivers/gpu/drm/amd/amdgpu/vega20_reg_init.c index 2d4473557b0d2..d13fc4fcb5179 100644 --- a/drivers/gpu/drm/amd/amdgpu/vega20_reg_init.c +++ b/drivers/gpu/drm/amd/amdgpu/vega20_reg_init.c @@ -49,6 +49,7 @@ int vega20_reg_base_init(struct amdgpu_device *adev) adev->reg_offset[SMUIO_HWIP][i] = (uint32_t *)(&(SMUIO_BASE.instance[i])); adev->reg_offset[NBIF_HWIP][i] = (uint32_t *)(&(NBIO_BASE.instance[i])); adev->reg_offset[THM_HWIP][i] = (uint32_t *)(&(THM_BASE.instance[i])); + adev->reg_offset[CLK_HWIP][i] = (uint32_t *)(&(CLK_BASE.instance[i])); } return 0; } From 689e7b34234e29e5168894b27b752a4e16ef08c4 Mon Sep 17 00:00:00 2001 From: Alex Deucher <alexander.deucher@amd.com> Date: Fri, 2 Nov 2018 10:51:50 -0500 Subject: [PATCH 216/443] drm/amdgpu/display: check if fbc is available in set_static_screen_control (v2) The value is dependent on whether fbc is available. v2: only check if num_pipes is valid Reviewed-by: Harry Wentland <harry.wentland@amd.com> Signed-off-by: Alex Deucher <alexander.deucher@amd.com> --- .../gpu/drm/amd/display/dc/dce110/dce110_hw_sequencer.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/display/dc/dce110/dce110_hw_sequencer.c b/drivers/gpu/drm/amd/display/dc/dce110/dce110_hw_sequencer.c index b75ede5f84f76..b459867a05b20 100644 --- a/drivers/gpu/drm/amd/display/dc/dce110/dce110_hw_sequencer.c +++ b/drivers/gpu/drm/amd/display/dc/dce110/dce110_hw_sequencer.c @@ -1736,7 +1736,12 @@ static void set_static_screen_control(struct pipe_ctx **pipe_ctx, if (events->force_trigger) value |= 0x1; - value |= 0x84; + if (num_pipes) { + struct dc *dc = pipe_ctx[0]->stream->ctx->dc; + + if (dc->fbc_compressor) + value |= 0x84; + } for (i = 0; i < num_pipes; i++) pipe_ctx[i]->stream_res.tg->funcs-> From 7875a22625aa7f11befba84bd1e669201032947d Mon Sep 17 00:00:00 2001 From: Alex Deucher <alexander.deucher@amd.com> Date: Tue, 6 Nov 2018 14:44:29 -0500 Subject: [PATCH 217/443] drm/amdgpu: add DC feature mask module parameter Similar to ppfeaturemask. Allows you to selectively enable/disable DC features. Reviewed-by: Harry Wentland <harry.wentland@amd.com> Signed-off-by: Alex Deucher <alexander.deucher@amd.com> --- drivers/gpu/drm/amd/amdgpu/amdgpu.h | 1 + drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c | 11 +++++++++++ drivers/gpu/drm/amd/include/amd_shared.h | 4 ++++ 3 files changed, 16 insertions(+) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h index d0102cfc8efbd..104b2e0d893bd 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h @@ -151,6 +151,7 @@ extern int amdgpu_compute_multipipe; extern int amdgpu_gpu_recovery; extern int amdgpu_emu_mode; extern uint amdgpu_smu_memory_pool_size; +extern uint amdgpu_dc_feature_mask; extern struct amdgpu_mgpu_info mgpu_info; #ifdef CONFIG_DRM_AMDGPU_SI diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c index 943dbf3c5da12..8de55f7f1a3a3 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c @@ -127,6 +127,9 @@ int amdgpu_compute_multipipe = -1; int amdgpu_gpu_recovery = -1; /* auto */ int amdgpu_emu_mode = 0; uint amdgpu_smu_memory_pool_size = 0; +/* FBC (bit 0) disabled by default*/ +uint amdgpu_dc_feature_mask = 0; + struct amdgpu_mgpu_info mgpu_info = { .mutex = __MUTEX_INITIALIZER(mgpu_info.mutex), }; @@ -631,6 +634,14 @@ module_param(halt_if_hws_hang, int, 0644); MODULE_PARM_DESC(halt_if_hws_hang, "Halt if HWS hang is detected (0 = off (default), 1 = on)"); #endif +/** + * DOC: dcfeaturemask (uint) + * Override display features enabled. See enum DC_FEATURE_MASK in drivers/gpu/drm/amd/include/amd_shared.h. + * The default is the current set of stable display features. + */ +MODULE_PARM_DESC(dcfeaturemask, "all stable DC features enabled (default))"); +module_param_named(dcfeaturemask, amdgpu_dc_feature_mask, uint, 0444); + static const struct pci_device_id pciidlist[] = { #ifdef CONFIG_DRM_AMDGPU_SI {0x1002, 0x6780, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_TAHITI}, diff --git a/drivers/gpu/drm/amd/include/amd_shared.h b/drivers/gpu/drm/amd/include/amd_shared.h index 2083c308007cd..470d7b89071a4 100644 --- a/drivers/gpu/drm/amd/include/amd_shared.h +++ b/drivers/gpu/drm/amd/include/amd_shared.h @@ -133,6 +133,10 @@ enum PP_FEATURE_MASK { PP_AVFS_MASK = 0x40000, }; +enum DC_FEATURE_MASK { + DC_FBC_MASK = 0x1, +}; + /** * struct amd_ip_funcs - general hooks for managing amdgpu IP Blocks */ From ce2127c462d9d1c0832f088b23158420e87e71a0 Mon Sep 17 00:00:00 2001 From: Alex Deucher <alexander.deucher@amd.com> Date: Tue, 6 Nov 2018 15:10:37 -0500 Subject: [PATCH 218/443] drm/amdgpu/display/dc: add FBC to dc_config Add FBC to the list of features that can be enabled from the DM. Reviewed-by: Harry Wentland <harry.wentland@amd.com> Signed-off-by: Alex Deucher <alexander.deucher@amd.com> --- drivers/gpu/drm/amd/display/dc/dc.h | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/gpu/drm/amd/display/dc/dc.h b/drivers/gpu/drm/amd/display/dc/dc.h index 199527171100b..b57fa61b3034a 100644 --- a/drivers/gpu/drm/amd/display/dc/dc.h +++ b/drivers/gpu/drm/amd/display/dc/dc.h @@ -169,6 +169,7 @@ struct link_training_settings; struct dc_config { bool gpu_vm_support; bool disable_disp_pll_sharing; + bool fbc_support; }; enum visual_confirm { From 04b94af4e348acc52546b9b19c933575f26589a1 Mon Sep 17 00:00:00 2001 From: Alex Deucher <alexander.deucher@amd.com> Date: Tue, 6 Nov 2018 15:19:49 -0500 Subject: [PATCH 219/443] drm/amdgpu/display/dm: handle FBC dc feature parameter Set the dc_config properly when the option is enabled. Reviewed-by: Harry Wentland <harry.wentland@amd.com> Signed-off-by: Alex Deucher <alexander.deucher@amd.com> --- drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c index 289ddd52cac48..d3f5cb1795bf8 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c @@ -429,6 +429,9 @@ static int amdgpu_dm_init(struct amdgpu_device *adev) adev->asic_type < CHIP_RAVEN) init_data.flags.gpu_vm_support = true; + if (amdgpu_dc_feature_mask & DC_FBC_MASK) + init_data.flags.fbc_support = true; + /* Display Core create. */ adev->dm.dc = dc_create(&init_data); From 5822e9539dc11721b53d74accd4c091b982011d0 Mon Sep 17 00:00:00 2001 From: Alex Deucher <alexander.deucher@amd.com> Date: Fri, 2 Nov 2018 10:54:27 -0500 Subject: [PATCH 220/443] drm/amdgpu/display/dce11: only enable FBC when selected Causes a black screen on a Stoney laptop. Bug: https://bugs.freedesktop.org/show_bug.cgi?id=108577 Reviewed-by: Harry Wentland <harry.wentland@amd.com> Signed-off-by: Alex Deucher <alexander.deucher@amd.com> --- drivers/gpu/drm/amd/display/dc/dce110/dce110_resource.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/display/dc/dce110/dce110_resource.c b/drivers/gpu/drm/amd/display/dc/dce110/dce110_resource.c index e3624ca245748..7c9fd9052ee23 100644 --- a/drivers/gpu/drm/amd/display/dc/dce110/dce110_resource.c +++ b/drivers/gpu/drm/amd/display/dc/dce110/dce110_resource.c @@ -1362,7 +1362,8 @@ static bool construct( pool->base.sw_i2cs[i] = NULL; } - dc->fbc_compressor = dce110_compressor_create(ctx); + if (dc->config.fbc_support) + dc->fbc_compressor = dce110_compressor_create(ctx); if (!underlay_create(ctx, &pool->base)) goto res_create_fail; From a6758309a005060b8297a538a457c88699cb2520 Mon Sep 17 00:00:00 2001 From: Vasily Averin <vvs@virtuozzo.com> Date: Tue, 6 Nov 2018 16:49:50 -0500 Subject: [PATCH 221/443] ext4: avoid buffer leak on shutdown in ext4_mark_iloc_dirty() ext4_mark_iloc_dirty() callers expect that it releases iloc->bh even if it returns an error. Fixes: 0db1ff222d40 ("ext4: add shutdown bit and check for it") Signed-off-by: Vasily Averin <vvs@virtuozzo.com> Signed-off-by: Theodore Ts'o <tytso@mit.edu> Cc: stable@kernel.org # 4.11 --- fs/ext4/inode.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c index c3d9a42c561ef..55c8fca76daff 100644 --- a/fs/ext4/inode.c +++ b/fs/ext4/inode.c @@ -5835,9 +5835,10 @@ int ext4_mark_iloc_dirty(handle_t *handle, { int err = 0; - if (unlikely(ext4_forced_shutdown(EXT4_SB(inode->i_sb)))) + if (unlikely(ext4_forced_shutdown(EXT4_SB(inode->i_sb)))) { + put_bh(iloc->bh); return -EIO; - + } if (IS_I_VERSION(inode)) inode_inc_iversion(inode); From feaf264ce7f8d54582e2f66eb82dd9dd124c94f3 Mon Sep 17 00:00:00 2001 From: Vasily Averin <vvs@virtuozzo.com> Date: Tue, 6 Nov 2018 17:01:36 -0500 Subject: [PATCH 222/443] ext4: avoid buffer leak in ext4_orphan_add() after prior errors Fixes: d745a8c20c1f ("ext4: reduce contention on s_orphan_lock") Fixes: 6e3617e579e0 ("ext4: Handle non empty on-disk orphan link") Cc: Dmitry Monakhov <dmonakhov@gmail.com> Signed-off-by: Vasily Averin <vvs@virtuozzo.com> Signed-off-by: Theodore Ts'o <tytso@mit.edu> Cc: stable@kernel.org # 2.6.34 --- fs/ext4/namei.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/fs/ext4/namei.c b/fs/ext4/namei.c index 67a38532032ae..d388cce72db20 100644 --- a/fs/ext4/namei.c +++ b/fs/ext4/namei.c @@ -2811,7 +2811,9 @@ int ext4_orphan_add(handle_t *handle, struct inode *inode) list_del_init(&EXT4_I(inode)->i_orphan); mutex_unlock(&sbi->s_orphan_lock); } - } + } else + brelse(iloc.bh); + jbd_debug(4, "superblock will point to %lu\n", inode->i_ino); jbd_debug(4, "orphan inode %lu will point to %d\n", inode->i_ino, NEXT_ORPHAN(inode)); From 4f32c38b4662312dd3c5f113d8bdd459887fb773 Mon Sep 17 00:00:00 2001 From: Theodore Ts'o <tytso@mit.edu> Date: Tue, 6 Nov 2018 17:18:17 -0500 Subject: [PATCH 223/443] ext4: avoid possible double brelse() in add_new_gdb() on error path Fixes: b40971426a83 ("ext4: add error checking to calls to ...") Reported-by: Vasily Averin <vvs@virtuozzo.com> Signed-off-by: Theodore Ts'o <tytso@mit.edu> Cc: stable@kernel.org # 2.6.38 --- fs/ext4/resize.c | 1 + 1 file changed, 1 insertion(+) diff --git a/fs/ext4/resize.c b/fs/ext4/resize.c index 85158e9de7c23..a5efee34415fe 100644 --- a/fs/ext4/resize.c +++ b/fs/ext4/resize.c @@ -871,6 +871,7 @@ static int add_new_gdb(handle_t *handle, struct inode *inode, err = ext4_handle_dirty_metadata(handle, NULL, gdb_bh); if (unlikely(err)) { ext4_std_error(sb, err); + iloc.bh = NULL; goto exit_inode; } brelse(dind); From 1bfc204dc0e7a690ab8440e91bb7d1a324320fdc Mon Sep 17 00:00:00 2001 From: Vasily Averin <vvs@virtuozzo.com> Date: Tue, 6 Nov 2018 17:45:02 -0500 Subject: [PATCH 224/443] ext4: remove unneeded brelse call in ext4_xattr_inode_update_ref() Signed-off-by: Vasily Averin <vvs@virtuozzo.com> Signed-off-by: Theodore Ts'o <tytso@mit.edu> --- fs/ext4/xattr.c | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/fs/ext4/xattr.c b/fs/ext4/xattr.c index f36fc5d5b2574..dc1aeab06dbad 100644 --- a/fs/ext4/xattr.c +++ b/fs/ext4/xattr.c @@ -1031,10 +1031,8 @@ static int ext4_xattr_inode_update_ref(handle_t *handle, struct inode *ea_inode, inode_lock(ea_inode); ret = ext4_reserve_inode_write(handle, ea_inode, &iloc); - if (ret) { - iloc.bh = NULL; + if (ret) goto out; - } ref_count = ext4_xattr_inode_get_ref(ea_inode); ref_count += ref_change; @@ -1080,12 +1078,10 @@ static int ext4_xattr_inode_update_ref(handle_t *handle, struct inode *ea_inode, } ret = ext4_mark_iloc_dirty(handle, ea_inode, &iloc); - iloc.bh = NULL; if (ret) ext4_warning_inode(ea_inode, "ext4_mark_iloc_dirty() failed ret=%d", ret); out: - brelse(iloc.bh); inode_unlock(ea_inode); return ret; } From aca49ee041cbdd329c55d4dbcd6b3d4b9af240e4 Mon Sep 17 00:00:00 2001 From: "Martin K. Petersen" <martin.petersen@oracle.com> Date: Mon, 5 Nov 2018 22:49:47 -0500 Subject: [PATCH 225/443] Revert "scsi: ufs: Disable blk-mq for now" This reverts commit d87161bea405e3260377026ca8a704a3f68bd67a. The issues that forced us to disable blk_mq for ufs have been resolved. Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com> --- drivers/scsi/ufs/ufshcd.c | 7 ------- 1 file changed, 7 deletions(-) diff --git a/drivers/scsi/ufs/ufshcd.c b/drivers/scsi/ufs/ufshcd.c index 23d7cca36ff03..27db55b0ca7f8 100644 --- a/drivers/scsi/ufs/ufshcd.c +++ b/drivers/scsi/ufs/ufshcd.c @@ -8099,13 +8099,6 @@ int ufshcd_alloc_host(struct device *dev, struct ufs_hba **hba_handle) err = -ENOMEM; goto out_error; } - - /* - * Do not use blk-mq at this time because blk-mq does not support - * runtime pm. - */ - host->use_blk_mq = false; - hba = shost_priv(host); hba->host = host; hba->dev = dev; From f635e48e866ee1a47d2d42ce012fdcc07bf55853 Mon Sep 17 00:00:00 2001 From: Quinn Tran <quinn.tran@cavium.com> Date: Tue, 6 Nov 2018 00:51:21 -0800 Subject: [PATCH 226/443] scsi: qla2xxx: Initialize port speed to avoid setting lower speed This patch initializes port speed so that firmware does not set lower operating speed. Setting lower speed in firmware impacts WRITE perfomance. Fixes: 726b85487067 ("qla2xxx: Add framework for async fabric discovery") Cc: <stable@vger.kernel.org> Signed-off-by: Quinn Tran <quinn.tran@cavium.com> Signed-off-by: Himanshu Madhani <himanshu.madhani@cavium.com> Tested-by: Laurence Oberman <loberman@redhat.com> Reviewed-by: Ewan D. Milne <emilne@redhat.com> Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com> --- drivers/scsi/qla2xxx/qla_init.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/scsi/qla2xxx/qla_init.c b/drivers/scsi/qla2xxx/qla_init.c index 6fe20c27acc16..eb59c796a795d 100644 --- a/drivers/scsi/qla2xxx/qla_init.c +++ b/drivers/scsi/qla2xxx/qla_init.c @@ -4763,6 +4763,7 @@ qla2x00_alloc_fcport(scsi_qla_host_t *vha, gfp_t flags) fcport->loop_id = FC_NO_LOOP_ID; qla2x00_set_fcport_state(fcport, FCS_UNCONFIGURED); fcport->supported_classes = FC_COS_UNSPECIFIED; + fcport->fp_speed = PORT_SPEED_UNKNOWN; fcport->ct_desc.ct_sns = dma_alloc_coherent(&vha->hw->pdev->dev, sizeof(struct ct_sns_pkt), &fcport->ct_desc.ct_sns_dma, From 28c5bcf74fa07c25d5bd118d1271920f51ce2a98 Mon Sep 17 00:00:00 2001 From: Scott Wood <oss@buserror.net> Date: Tue, 6 Nov 2018 19:49:34 -0600 Subject: [PATCH 227/443] KVM: PPC: Move and undef TRACE_INCLUDE_PATH/FILE TRACE_INCLUDE_PATH and TRACE_INCLUDE_FILE are used by <trace/define_trace.h>, so like that #include, they should be outside #ifdef protection. They also need to be #undefed before defining, in case multiple trace headers are included by the same C file. This became the case on book3e after commit cf4a6085151a ("powerpc/mm: Add missing tracepoint for tlbie"), leading to the following build error: CC arch/powerpc/kvm/powerpc.o In file included from arch/powerpc/kvm/powerpc.c:51:0: arch/powerpc/kvm/trace.h:9:0: error: "TRACE_INCLUDE_PATH" redefined [-Werror] #define TRACE_INCLUDE_PATH . ^ In file included from arch/powerpc/kvm/../mm/mmu_decl.h:25:0, from arch/powerpc/kvm/powerpc.c:48: ./arch/powerpc/include/asm/trace.h:224:0: note: this is the location of the previous definition #define TRACE_INCLUDE_PATH asm ^ cc1: all warnings being treated as errors Reported-by: Christian Zigotzky <chzigotzky@xenosoft.de> Signed-off-by: Scott Wood <oss@buserror.net> Signed-off-by: Michael Ellerman <mpe@ellerman.id.au> --- arch/powerpc/kvm/trace.h | 8 ++++++-- arch/powerpc/kvm/trace_booke.h | 9 +++++++-- arch/powerpc/kvm/trace_hv.h | 9 +++++++-- arch/powerpc/kvm/trace_pr.h | 9 +++++++-- 4 files changed, 27 insertions(+), 8 deletions(-) diff --git a/arch/powerpc/kvm/trace.h b/arch/powerpc/kvm/trace.h index 491b0f715d6bc..ea1d7c8083190 100644 --- a/arch/powerpc/kvm/trace.h +++ b/arch/powerpc/kvm/trace.h @@ -6,8 +6,6 @@ #undef TRACE_SYSTEM #define TRACE_SYSTEM kvm -#define TRACE_INCLUDE_PATH . -#define TRACE_INCLUDE_FILE trace /* * Tracepoint for guest mode entry. @@ -120,4 +118,10 @@ TRACE_EVENT(kvm_check_requests, #endif /* _TRACE_KVM_H */ /* This part must be outside protection */ +#undef TRACE_INCLUDE_PATH +#undef TRACE_INCLUDE_FILE + +#define TRACE_INCLUDE_PATH . +#define TRACE_INCLUDE_FILE trace + #include <trace/define_trace.h> diff --git a/arch/powerpc/kvm/trace_booke.h b/arch/powerpc/kvm/trace_booke.h index ac640e81fdc5f..3837842986aa4 100644 --- a/arch/powerpc/kvm/trace_booke.h +++ b/arch/powerpc/kvm/trace_booke.h @@ -6,8 +6,6 @@ #undef TRACE_SYSTEM #define TRACE_SYSTEM kvm_booke -#define TRACE_INCLUDE_PATH . -#define TRACE_INCLUDE_FILE trace_booke #define kvm_trace_symbol_exit \ {0, "CRITICAL"}, \ @@ -218,4 +216,11 @@ TRACE_EVENT(kvm_booke_queue_irqprio, #endif /* This part must be outside protection */ + +#undef TRACE_INCLUDE_PATH +#undef TRACE_INCLUDE_FILE + +#define TRACE_INCLUDE_PATH . +#define TRACE_INCLUDE_FILE trace_booke + #include <trace/define_trace.h> diff --git a/arch/powerpc/kvm/trace_hv.h b/arch/powerpc/kvm/trace_hv.h index bcfe8a987f6a9..8a1e3b0047f19 100644 --- a/arch/powerpc/kvm/trace_hv.h +++ b/arch/powerpc/kvm/trace_hv.h @@ -9,8 +9,6 @@ #undef TRACE_SYSTEM #define TRACE_SYSTEM kvm_hv -#define TRACE_INCLUDE_PATH . -#define TRACE_INCLUDE_FILE trace_hv #define kvm_trace_symbol_hcall \ {H_REMOVE, "H_REMOVE"}, \ @@ -497,4 +495,11 @@ TRACE_EVENT(kvmppc_run_vcpu_exit, #endif /* _TRACE_KVM_HV_H */ /* This part must be outside protection */ + +#undef TRACE_INCLUDE_PATH +#undef TRACE_INCLUDE_FILE + +#define TRACE_INCLUDE_PATH . +#define TRACE_INCLUDE_FILE trace_hv + #include <trace/define_trace.h> diff --git a/arch/powerpc/kvm/trace_pr.h b/arch/powerpc/kvm/trace_pr.h index 2f9a8829552b9..46a46d328fbf2 100644 --- a/arch/powerpc/kvm/trace_pr.h +++ b/arch/powerpc/kvm/trace_pr.h @@ -8,8 +8,6 @@ #undef TRACE_SYSTEM #define TRACE_SYSTEM kvm_pr -#define TRACE_INCLUDE_PATH . -#define TRACE_INCLUDE_FILE trace_pr TRACE_EVENT(kvm_book3s_reenter, TP_PROTO(int r, struct kvm_vcpu *vcpu), @@ -257,4 +255,11 @@ TRACE_EVENT(kvm_exit, #endif /* _TRACE_KVM_H */ /* This part must be outside protection */ + +#undef TRACE_INCLUDE_PATH +#undef TRACE_INCLUDE_FILE + +#define TRACE_INCLUDE_PATH . +#define TRACE_INCLUDE_FILE trace_pr + #include <trace/define_trace.h> From 8d72ee3266f08fd3490011ee7b95ffc31e66fd6d Mon Sep 17 00:00:00 2001 From: Viresh Kumar <viresh.kumar@linaro.org> Date: Mon, 29 Oct 2018 12:47:40 +0530 Subject: [PATCH 228/443] Documentation: cpu-freq: Frequencies aren't always sorted The order in which the frequencies are displayed in cpufreq stats depends on the order in which the frequencies were sorted in the frequency table provided to cpufreq core by the cpufreq driver. They can be completely unsorted as well. The documentation's claim that the stats will be sorted in descending order is hence incorrect and here is an attempt to fix it. Reported-by: Pavel <pavel2000@ngs.ru> Signed-off-by: Viresh Kumar <viresh.kumar@linaro.org> Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com> --- Documentation/cpu-freq/cpufreq-stats.txt | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/Documentation/cpu-freq/cpufreq-stats.txt b/Documentation/cpu-freq/cpufreq-stats.txt index a873855c811d6..14378cecb1723 100644 --- a/Documentation/cpu-freq/cpufreq-stats.txt +++ b/Documentation/cpu-freq/cpufreq-stats.txt @@ -86,9 +86,11 @@ transitions. This will give a fine grained information about all the CPU frequency transitions. The cat output here is a two dimensional matrix, where an entry <i,j> (row i, column j) represents the count of number of transitions from -Freq_i to Freq_j. Freq_i is in descending order with increasing rows and -Freq_j is in descending order with increasing columns. The output here also -contains the actual freq values for each row and column for better readability. +Freq_i to Freq_j. Freq_i rows and Freq_j columns follow the sorting order in +which the driver has provided the frequency table initially to the cpufreq core +and so can be sorted (ascending or descending) or unsorted. The output here +also contains the actual freq values for each row and column for better +readability. If the transition table is bigger than PAGE_SIZE, reading this will return an -EFBIG error. From 6ef28a04d1ccf718eee069b72132ce4aa1e52ab9 Mon Sep 17 00:00:00 2001 From: Anson Huang <anson.huang@nxp.com> Date: Mon, 5 Nov 2018 00:59:28 +0000 Subject: [PATCH 229/443] cpufreq: imx6q: add return value check for voltage scale Add return value check for voltage scale when ARM clock rate change fail. Signed-off-by: Anson Huang <Anson.Huang@nxp.com> Acked-by: Viresh Kumar <viresh.kumar@linaro.org> Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com> --- drivers/cpufreq/imx6q-cpufreq.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/drivers/cpufreq/imx6q-cpufreq.c b/drivers/cpufreq/imx6q-cpufreq.c index 8cfee0ab804b4..d8c3595e90236 100644 --- a/drivers/cpufreq/imx6q-cpufreq.c +++ b/drivers/cpufreq/imx6q-cpufreq.c @@ -160,8 +160,13 @@ static int imx6q_set_target(struct cpufreq_policy *policy, unsigned int index) /* Ensure the arm clock divider is what we expect */ ret = clk_set_rate(clks[ARM].clk, new_freq * 1000); if (ret) { + int ret1; + dev_err(cpu_dev, "failed to set clock rate: %d\n", ret); - regulator_set_voltage_tol(arm_reg, volt_old, 0); + ret1 = regulator_set_voltage_tol(arm_reg, volt_old, 0); + if (ret1) + dev_warn(cpu_dev, + "failed to restore vddarm voltage: %d\n", ret1); return ret; } From e531efa1e92b888acce45785b3ae69278fa712c0 Mon Sep 17 00:00:00 2001 From: Zhao Wei Liew <zhaoweiliew@gmail.com> Date: Thu, 1 Nov 2018 01:32:46 +0000 Subject: [PATCH 230/443] Documentation: cpufreq: Correct a typo Fix a typo in the admin-guide documentation for cpufreq. Signed-off-by: Zhao Wei Liew <zhaoweiliew@gmail.com> Acked-by: Viresh Kumar <viresh.kumar@linaro.org> Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com> --- Documentation/admin-guide/pm/cpufreq.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Documentation/admin-guide/pm/cpufreq.rst b/Documentation/admin-guide/pm/cpufreq.rst index 47153e64dfb53..7eca9026a9ed2 100644 --- a/Documentation/admin-guide/pm/cpufreq.rst +++ b/Documentation/admin-guide/pm/cpufreq.rst @@ -150,7 +150,7 @@ data structures necessary to handle the given policy and, possibly, to add a governor ``sysfs`` interface to it. Next, the governor is started by invoking its ``->start()`` callback. -That callback it expected to register per-CPU utilization update callbacks for +That callback is expected to register per-CPU utilization update callbacks for all of the online CPUs belonging to the given policy with the CPU scheduler. The utilization update callbacks will be invoked by the CPU scheduler on important events, like task enqueue and dequeue, on every iteration of the From 9e463084cdb22e0b56b2dfbc50461020409a5fd3 Mon Sep 17 00:00:00 2001 From: Theodore Ts'o <tytso@mit.edu> Date: Wed, 7 Nov 2018 10:32:53 -0500 Subject: [PATCH 231/443] ext4: fix possible leak of sbi->s_group_desc_leak in error path Fixes: bfe0a5f47ada ("ext4: add more mount time checks of the superblock") Reported-by: Vasily Averin <vvs@virtuozzo.com> Signed-off-by: Theodore Ts'o <tytso@mit.edu> Cc: stable@kernel.org # 4.18 --- fs/ext4/super.c | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/fs/ext4/super.c b/fs/ext4/super.c index a221f1cdf7046..92092b55db1ec 100644 --- a/fs/ext4/super.c +++ b/fs/ext4/super.c @@ -4075,6 +4075,14 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent) sbi->s_groups_count = blocks_count; sbi->s_blockfile_groups = min_t(ext4_group_t, sbi->s_groups_count, (EXT4_MAX_BLOCK_FILE_PHYS / EXT4_BLOCKS_PER_GROUP(sb))); + if (((u64)sbi->s_groups_count * sbi->s_inodes_per_group) != + le32_to_cpu(es->s_inodes_count)) { + ext4_msg(sb, KERN_ERR, "inodes count not valid: %u vs %llu", + le32_to_cpu(es->s_inodes_count), + ((u64)sbi->s_groups_count * sbi->s_inodes_per_group)); + ret = -EINVAL; + goto failed_mount; + } db_count = (sbi->s_groups_count + EXT4_DESC_PER_BLOCK(sb) - 1) / EXT4_DESC_PER_BLOCK(sb); if (ext4_has_feature_meta_bg(sb)) { @@ -4094,14 +4102,6 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent) ret = -ENOMEM; goto failed_mount; } - if (((u64)sbi->s_groups_count * sbi->s_inodes_per_group) != - le32_to_cpu(es->s_inodes_count)) { - ext4_msg(sb, KERN_ERR, "inodes count not valid: %u vs %llu", - le32_to_cpu(es->s_inodes_count), - ((u64)sbi->s_groups_count * sbi->s_inodes_per_group)); - ret = -EINVAL; - goto failed_mount; - } bgl_lock_init(sbi->s_blockgroup_lock); From f26621e60b35369bca9228bc936dc723b3e421af Mon Sep 17 00:00:00 2001 From: Borislav Petkov <bp@suse.de> Date: Mon, 5 Nov 2018 10:33:07 +0100 Subject: [PATCH 232/443] resource/docs: Complete kernel-doc style function documentation Add the missing kernel-doc style function parameters documentation. Signed-off-by: Borislav Petkov <bp@suse.de> Cc: Linus Torvalds <torvalds@linux-foundation.org> Cc: Peter Zijlstra <peterz@infradead.org> Cc: Thomas Gleixner <tglx@linutronix.de> Cc: akpm@linux-foundation.org Cc: linux-tip-commits@vger.kernel.org Cc: rdunlap@infradead.org Fixes: b69c2e20f6e4 ("resource: Clean it up a bit") Link: http://lkml.kernel.org/r/20181105093307.GA12445@zn.tnic Signed-off-by: Ingo Molnar <mingo@kernel.org> --- kernel/resource.c | 23 ++++++++++++++--------- 1 file changed, 14 insertions(+), 9 deletions(-) diff --git a/kernel/resource.c b/kernel/resource.c index 17bcb189d5306..b0fbf685c77a5 100644 --- a/kernel/resource.c +++ b/kernel/resource.c @@ -318,17 +318,24 @@ int release_resource(struct resource *old) EXPORT_SYMBOL(release_resource); -/* - * Finds the lowest iomem resource that covers part of [start..end]. The - * caller must specify start, end, flags, and desc (which may be +/** + * Finds the lowest iomem resource that covers part of [@start..@end]. The + * caller must specify @start, @end, @flags, and @desc (which may be * IORES_DESC_NONE). * - * If a resource is found, returns 0 and *res is overwritten with the part - * of the resource that's within [start..end]; if none is found, returns - * -1. Returns -EINVAL for other invalid parameters. + * If a resource is found, returns 0 and @*res is overwritten with the part + * of the resource that's within [@start..@end]; if none is found, returns + * -1 or -EINVAL for other invalid parameters. * * This function walks the whole tree and not just first level children * unless @first_lvl is true. + * + * @start: start address of the resource searched for + * @end: end address of same resource + * @flags: flags which the resource must have + * @desc: descriptor the resource must have + * @first_lvl: walk only the first level children, if set + * @res: return ptr, if resource found */ static int find_next_iomem_res(resource_size_t start, resource_size_t end, unsigned long flags, unsigned long desc, @@ -390,9 +397,7 @@ static int __walk_iomem_res_desc(resource_size_t start, resource_size_t end, } /** - * walk_iomem_res_desc - walk through iomem resources - * - * Walks through iomem resources and calls @func() with matching resource + * Walks through iomem resources and calls func() with matching resource * ranges. This walks through whole tree and not just first level children. * All the memory ranges which overlap start,end and also match flags and * desc are valid candidates. From af18e35bfd01e6d65a5e3ef84ffe8b252d1628c5 Mon Sep 17 00:00:00 2001 From: Vasily Averin <vvs@virtuozzo.com> Date: Wed, 7 Nov 2018 10:56:28 -0500 Subject: [PATCH 233/443] ext4: fix possible leak of s_journal_flag_rwsem in error path Fixes: c8585c6fcaf2 ("ext4: fix races between changing inode journal ...") Signed-off-by: Vasily Averin <vvs@virtuozzo.com> Signed-off-by: Theodore Ts'o <tytso@mit.edu> Cc: stable@kernel.org # 4.7 --- fs/ext4/super.c | 1 + 1 file changed, 1 insertion(+) diff --git a/fs/ext4/super.c b/fs/ext4/super.c index 92092b55db1ec..53ff6c2a26ed9 100644 --- a/fs/ext4/super.c +++ b/fs/ext4/super.c @@ -4510,6 +4510,7 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent) percpu_counter_destroy(&sbi->s_freeinodes_counter); percpu_counter_destroy(&sbi->s_dirs_counter); percpu_counter_destroy(&sbi->s_dirtyclusters_counter); + percpu_free_rwsem(&sbi->s_journal_flag_rwsem); failed_mount5: ext4_ext_release(sb); ext4_release_system_zone(sb); From ecaaf408478b6fb4d9986f9b6652f3824e374f4c Mon Sep 17 00:00:00 2001 From: Vasily Averin <vvs@virtuozzo.com> Date: Wed, 7 Nov 2018 11:01:33 -0500 Subject: [PATCH 234/443] ext4: fix buffer leak in ext4_xattr_get_block() on error path Fixes: dec214d00e0d ("ext4: xattr inode deduplication") Signed-off-by: Vasily Averin <vvs@virtuozzo.com> Signed-off-by: Theodore Ts'o <tytso@mit.edu> Cc: stable@kernel.org # 4.13 --- fs/ext4/xattr.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/fs/ext4/xattr.c b/fs/ext4/xattr.c index dc1aeab06dbad..07c3a115f7ae9 100644 --- a/fs/ext4/xattr.c +++ b/fs/ext4/xattr.c @@ -2272,8 +2272,10 @@ static struct buffer_head *ext4_xattr_get_block(struct inode *inode) if (!bh) return ERR_PTR(-EIO); error = ext4_xattr_check_block(inode, bh); - if (error) + if (error) { + brelse(bh); return ERR_PTR(error); + } return bh; } From 45ae932d246f721e6584430017176cbcadfde610 Mon Sep 17 00:00:00 2001 From: Vasily Averin <vvs@virtuozzo.com> Date: Wed, 7 Nov 2018 11:07:01 -0500 Subject: [PATCH 235/443] ext4: release bs.bh before re-using in ext4_xattr_block_find() bs.bh was taken in previous ext4_xattr_block_find() call, it should be released before re-using Fixes: 7e01c8e5420b ("ext3/4: fix uninitialized bs in ...") Signed-off-by: Vasily Averin <vvs@virtuozzo.com> Signed-off-by: Theodore Ts'o <tytso@mit.edu> Cc: stable@kernel.org # 2.6.26 --- fs/ext4/xattr.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/fs/ext4/xattr.c b/fs/ext4/xattr.c index 07c3a115f7ae9..07b9a335c8eb4 100644 --- a/fs/ext4/xattr.c +++ b/fs/ext4/xattr.c @@ -2395,6 +2395,8 @@ ext4_xattr_set_handle(handle_t *handle, struct inode *inode, int name_index, error = ext4_xattr_block_set(handle, inode, &i, &bs); } else if (error == -ENOSPC) { if (EXT4_I(inode)->i_file_acl && !bs.s.base) { + brelse(bs.bh); + bs.bh = NULL; error = ext4_xattr_block_find(inode, &i, &bs); if (error) goto cleanup; From 6bdc9977fcdedf47118d2caf7270a19f4b6d8a8f Mon Sep 17 00:00:00 2001 From: Vasily Averin <vvs@virtuozzo.com> Date: Wed, 7 Nov 2018 11:10:21 -0500 Subject: [PATCH 236/443] ext4: fix buffer leak in ext4_xattr_move_to_block() on error path Fixes: 3f2571c1f91f ("ext4: factor out xattr moving") Fixes: 6dd4ee7cab7e ("ext4: Expand extra_inodes space per ...") Reviewed-by: Jan Kara <jack@suse.cz> Signed-off-by: Vasily Averin <vvs@virtuozzo.com> Signed-off-by: Theodore Ts'o <tytso@mit.edu> Cc: stable@kernel.org # 2.6.23 --- fs/ext4/xattr.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/fs/ext4/xattr.c b/fs/ext4/xattr.c index 07b9a335c8eb4..5c9bc0d85cc0a 100644 --- a/fs/ext4/xattr.c +++ b/fs/ext4/xattr.c @@ -2617,6 +2617,8 @@ static int ext4_xattr_move_to_block(handle_t *handle, struct inode *inode, kfree(buffer); if (is) brelse(is->iloc.bh); + if (bs) + brelse(bs->bh); kfree(is); kfree(bs); From fbb974ba693bbfb4e24a62181ef16d4e45febc37 Mon Sep 17 00:00:00 2001 From: Hans de Goede <hdegoede@redhat.com> Date: Tue, 4 Sep 2018 16:51:29 +0200 Subject: [PATCH 237/443] rtc: cmos: Do not export alarm rtc_ops when we do not support alarms When there is no IRQ configured for the RTC, the rtc-cmos code does not support alarms, all alarm rtc_ops fail with -EIO / -EINVAL. The rtc-core expects a rtc driver which does not support rtc alarms to not have alarm ops at all. Otherwise the wakealarm sysfs attr will read as empty rather then returning an error, making it impossible for userspace to find out beforehand if alarms are supported. A system without an IRQ for the RTC before this patch: [root@localhost ~]# cat /sys/class/rtc/rtc0/wakealarm [root@localhost ~]# After this patch: [root@localhost ~]# cat /sys/class/rtc/rtc0/wakealarm cat: /sys/class/rtc/rtc0/wakealarm: No such file or directory [root@localhost ~]# This fixes gnome-session + systemd trying to use suspend-then-hibernate, which causes systemd to abort the suspend when writing the RTC alarm fails. BugLink: https://github.com/systemd/systemd/issues/9988 Signed-off-by: Hans de Goede <hdegoede@redhat.com> Signed-off-by: Alexandre Belloni <alexandre.belloni@bootlin.com> --- drivers/rtc/rtc-cmos.c | 16 ++++++++++++---- 1 file changed, 12 insertions(+), 4 deletions(-) diff --git a/drivers/rtc/rtc-cmos.c b/drivers/rtc/rtc-cmos.c index df0c5776d49bb..a5a19ff105354 100644 --- a/drivers/rtc/rtc-cmos.c +++ b/drivers/rtc/rtc-cmos.c @@ -257,6 +257,7 @@ static int cmos_read_alarm(struct device *dev, struct rtc_wkalrm *t) struct cmos_rtc *cmos = dev_get_drvdata(dev); unsigned char rtc_control; + /* This not only a rtc_op, but also called directly */ if (!is_valid_irq(cmos->irq)) return -EIO; @@ -452,6 +453,7 @@ static int cmos_set_alarm(struct device *dev, struct rtc_wkalrm *t) unsigned char mon, mday, hrs, min, sec, rtc_control; int ret; + /* This not only a rtc_op, but also called directly */ if (!is_valid_irq(cmos->irq)) return -EIO; @@ -516,9 +518,6 @@ static int cmos_alarm_irq_enable(struct device *dev, unsigned int enabled) struct cmos_rtc *cmos = dev_get_drvdata(dev); unsigned long flags; - if (!is_valid_irq(cmos->irq)) - return -EINVAL; - spin_lock_irqsave(&rtc_lock, flags); if (enabled) @@ -579,6 +578,12 @@ static const struct rtc_class_ops cmos_rtc_ops = { .alarm_irq_enable = cmos_alarm_irq_enable, }; +static const struct rtc_class_ops cmos_rtc_ops_no_alarm = { + .read_time = cmos_read_time, + .set_time = cmos_set_time, + .proc = cmos_procfs, +}; + /*----------------------------------------------------------------*/ /* @@ -855,9 +860,12 @@ cmos_do_probe(struct device *dev, struct resource *ports, int rtc_irq) dev_dbg(dev, "IRQ %d is already in use\n", rtc_irq); goto cleanup1; } + + cmos_rtc.rtc->ops = &cmos_rtc_ops; + } else { + cmos_rtc.rtc->ops = &cmos_rtc_ops_no_alarm; } - cmos_rtc.rtc->ops = &cmos_rtc_ops; cmos_rtc.rtc->nvram_old_abi = true; retval = rtc_register_device(cmos_rtc.rtc); if (retval) From 7ce9a992ffde8ce93d5ae5767362a5c7389ae895 Mon Sep 17 00:00:00 2001 From: "Maciej W. Rozycki" <macro@linux-mips.org> Date: Mon, 5 Nov 2018 03:48:25 +0000 Subject: [PATCH 238/443] rtc: hctosys: Add missing range error reporting Fix an issue with the 32-bit range error path in `rtc_hctosys' where no error code is set and consequently the successful preceding call result from `rtc_read_time' is propagated to `rtc_hctosys_ret'. This in turn makes any subsequent call to `hctosys_show' incorrectly report in sysfs that the system time has been set from this RTC while it has not. Set the error to ERANGE then if we can't express the result due to an overflow. Signed-off-by: Maciej W. Rozycki <macro@linux-mips.org> Fixes: b3a5ac42ab18 ("rtc: hctosys: Ensure system time doesn't overflow time_t") Cc: stable@vger.kernel.org # 4.17+ Signed-off-by: Alexandre Belloni <alexandre.belloni@bootlin.com> --- drivers/rtc/hctosys.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/rtc/hctosys.c b/drivers/rtc/hctosys.c index e79f2a181ad24..b9ec4a16db1f6 100644 --- a/drivers/rtc/hctosys.c +++ b/drivers/rtc/hctosys.c @@ -50,8 +50,10 @@ static int __init rtc_hctosys(void) tv64.tv_sec = rtc_tm_to_time64(&tm); #if BITS_PER_LONG == 32 - if (tv64.tv_sec > INT_MAX) + if (tv64.tv_sec > INT_MAX) { + err = -ERANGE; goto err_read; + } #endif err = do_settimeofday64(&tv64); From 9bde0afb7a906f1dabdba37162551565740b862d Mon Sep 17 00:00:00 2001 From: Xulin Sun <xulin.sun@windriver.com> Date: Tue, 6 Nov 2018 16:42:19 +0800 Subject: [PATCH 239/443] rtc: pcf2127: fix a kmemleak caused in pcf2127_i2c_gather_write pcf2127_i2c_gather_write() allocates memory as local variable for i2c_master_send(), after finishing the master transfer, the allocated memory should be freed. The kmemleak is reported: unreferenced object 0xffff80231e7dba80 (size 64): comm "hwclock", pid 27762, jiffies 4296880075 (age 356.944s) hex dump (first 32 bytes): 03 00 12 03 19 02 11 13 00 80 98 18 00 00 ff ff ................ 00 50 00 00 00 00 00 00 02 00 00 00 00 00 00 00 .P.............. backtrace: [<ffff000008221398>] create_object+0xf8/0x278 [<ffff000008a96264>] kmemleak_alloc+0x74/0xa0 [<ffff00000821070c>] __kmalloc+0x1ac/0x348 [<ffff0000087ed1dc>] pcf2127_i2c_gather_write+0x54/0xf8 [<ffff0000085fd9d4>] _regmap_raw_write+0x464/0x850 [<ffff0000085fe3f4>] regmap_bulk_write+0x1a4/0x348 [<ffff0000087ed32c>] pcf2127_rtc_set_time+0xac/0xe8 [<ffff0000087eaad8>] rtc_set_time+0x80/0x138 [<ffff0000087ebfb0>] rtc_dev_ioctl+0x398/0x610 [<ffff00000823f2c0>] do_vfs_ioctl+0xb0/0x848 [<ffff00000823fae4>] SyS_ioctl+0x8c/0xa8 [<ffff000008083ac0>] el0_svc_naked+0x34/0x38 [<ffffffffffffffff>] 0xffffffffffffffff Signed-off-by: Xulin Sun <xulin.sun@windriver.com> Signed-off-by: Alexandre Belloni <alexandre.belloni@bootlin.com> --- drivers/rtc/rtc-pcf2127.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/rtc/rtc-pcf2127.c b/drivers/rtc/rtc-pcf2127.c index 9f99a0966550b..7cb786d76e3c1 100644 --- a/drivers/rtc/rtc-pcf2127.c +++ b/drivers/rtc/rtc-pcf2127.c @@ -303,6 +303,9 @@ static int pcf2127_i2c_gather_write(void *context, memcpy(buf + 1, val, val_size); ret = i2c_master_send(client, buf, val_size + 1); + + kfree(buf); + if (ret != val_size + 1) return ret < 0 ? ret : -EIO; From 53692ec074d00589c2cf1d6d17ca76ad0adce6ec Mon Sep 17 00:00:00 2001 From: Vasily Averin <vvs@virtuozzo.com> Date: Wed, 7 Nov 2018 11:14:35 -0500 Subject: [PATCH 240/443] ext4: fix buffer leak in ext4_expand_extra_isize_ea() on error path Fixes: de05ca852679 ("ext4: move call to ext4_error() into ...") Signed-off-by: Vasily Averin <vvs@virtuozzo.com> Signed-off-by: Theodore Ts'o <tytso@mit.edu> Cc: stable@kernel.org # 4.17 --- fs/ext4/xattr.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/fs/ext4/xattr.c b/fs/ext4/xattr.c index 5c9bc0d85cc0a..0b96886835262 100644 --- a/fs/ext4/xattr.c +++ b/fs/ext4/xattr.c @@ -2698,7 +2698,6 @@ int ext4_expand_extra_isize_ea(struct inode *inode, int new_extra_isize, struct ext4_inode *raw_inode, handle_t *handle) { struct ext4_xattr_ibody_header *header; - struct buffer_head *bh; struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb); static unsigned int mnt_count; size_t min_offs; @@ -2739,13 +2738,17 @@ int ext4_expand_extra_isize_ea(struct inode *inode, int new_extra_isize, * EA block can hold new_extra_isize bytes. */ if (EXT4_I(inode)->i_file_acl) { + struct buffer_head *bh; + bh = sb_bread(inode->i_sb, EXT4_I(inode)->i_file_acl); error = -EIO; if (!bh) goto cleanup; error = ext4_xattr_check_block(inode, bh); - if (error) + if (error) { + brelse(bh); goto cleanup; + } base = BHDR(bh); end = bh->b_data + bh->b_size; min_offs = end - base; From ffe498237b36ee42624e139b21efa05da4ff1f48 Mon Sep 17 00:00:00 2001 From: Chinh T Cao <chinh.t.cao@intel.com> Date: Mon, 5 Nov 2018 12:18:35 -0800 Subject: [PATCH 241/443] ice: Change req_speeds to be u16 Since the req_speeds field in struct ice_link_status is a u8, req_speeds & ICE_AQ_LINK_SPEED_40GB always returns 0. This was caught by a coverity scan. Fix this by changing req_speeds to be u16. Reported-by: Bruce Allan <bruce.w.allan@intel.com> Signed-off-by: Chinh T Cao <chinh.t.cao@intel.com> Signed-off-by: Anirudh Venkataramanan <anirudh.venkataramanan@intel.com> Tested-by: Andrew Bowers <andrewx.bowers@intel.com> Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com> --- drivers/net/ethernet/intel/ice/ice_type.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/intel/ice/ice_type.h b/drivers/net/ethernet/intel/ice/ice_type.h index 12f9432abf110..f4dbc81c19886 100644 --- a/drivers/net/ethernet/intel/ice/ice_type.h +++ b/drivers/net/ethernet/intel/ice/ice_type.h @@ -92,12 +92,12 @@ struct ice_link_status { u64 phy_type_low; u16 max_frame_size; u16 link_speed; + u16 req_speeds; u8 lse_ena; /* Link Status Event notification */ u8 link_info; u8 an_info; u8 ext_info; u8 pacing; - u8 req_speeds; /* Refer to #define from module_type[ICE_MODULE_TYPE_TOTAL_BYTE] of * ice_aqc_get_phy_caps structure */ From ba766b8b99c30ad3c55ed8cf224d1185ecff1476 Mon Sep 17 00:00:00 2001 From: Jacob Keller <jacob.e.keller@intel.com> Date: Mon, 29 Oct 2018 10:52:42 -0700 Subject: [PATCH 242/443] i40e: restore NETIF_F_GSO_IPXIP[46] to netdev features Since commit bacd75cfac8a ("i40e/i40evf: Add capability exchange for outer checksum", 2017-04-06) the i40e driver has not reported support for IP-in-IP offloads. This likely occurred due to a bad rebase, as the commit extracts hw_enc_features into its own variable. As part of this change, it dropped the NETIF_F_FSO_IPXIP flags from the netdev->hw_enc_features. This was unfortunately not caught during code review. Fix this by adding back the missing feature flags. For reference, NETIF_F_GSO_IPXIP4 was added in commit 7e13318daa4a ("net: define gso types for IPx over IPv4 and IPv6", 2016-05-20), replacing NETIF_F_GSO_IPIP and NETIF_F_GSO_SIT. NETIF_F_GSO_IPXIP6 was added in commit bf2d1df39502 ("intel: Add support for IPv6 IP-in-IP offload", 2016-05-20). Signed-off-by: Jacob Keller <jacob.e.keller@intel.com> Tested-by: Andrew Bowers <andrewx.bowers@intel.com> Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com> --- drivers/net/ethernet/intel/i40e/i40e_main.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/net/ethernet/intel/i40e/i40e_main.c b/drivers/net/ethernet/intel/i40e/i40e_main.c index bc71a21c1dc2c..3ff5ee49818b0 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_main.c +++ b/drivers/net/ethernet/intel/i40e/i40e_main.c @@ -12249,6 +12249,8 @@ static int i40e_config_netdev(struct i40e_vsi *vsi) NETIF_F_GSO_GRE | NETIF_F_GSO_GRE_CSUM | NETIF_F_GSO_PARTIAL | + NETIF_F_GSO_IPXIP4 | + NETIF_F_GSO_IPXIP6 | NETIF_F_GSO_UDP_TUNNEL | NETIF_F_GSO_UDP_TUNNEL_CSUM | NETIF_F_SCTP_CRC | From d5596fd46770550873ab4c02bcb69f83d3d63f63 Mon Sep 17 00:00:00 2001 From: Jacob Keller <jacob.e.keller@intel.com> Date: Mon, 29 Oct 2018 10:52:43 -0700 Subject: [PATCH 243/443] i40e: enable NETIF_F_NTUPLE and NETIF_F_HW_TC at driver load The assignment of the feature flag NETIF_F_NTUPLE and NETIF_F_HW_TC occurs prior to the initial setup of the local hw_features variable. This means the features are set as user-changeable, but are not set in the currently active feature list. This results in the features being disabled at the driver's initial load. Move the assignment after the initial assignment of hw_features, and assign to the local variable. This ensures that NETIF_F_NTUPLE and NETIF_F_HW_TC are marked as user-changeable, and also enables them by default when the driver loads. Signed-off-by: Jacob Keller <jacob.e.keller@intel.com> Tested-by: Andrew Bowers <andrewx.bowers@intel.com> Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com> --- drivers/net/ethernet/intel/i40e/i40e_main.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/net/ethernet/intel/i40e/i40e_main.c b/drivers/net/ethernet/intel/i40e/i40e_main.c index 3ff5ee49818b0..21c2688d63082 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_main.c +++ b/drivers/net/ethernet/intel/i40e/i40e_main.c @@ -12268,13 +12268,13 @@ static int i40e_config_netdev(struct i40e_vsi *vsi) /* record features VLANs can make use of */ netdev->vlan_features |= hw_enc_features | NETIF_F_TSO_MANGLEID; - if (!(pf->flags & I40E_FLAG_MFP_ENABLED)) - netdev->hw_features |= NETIF_F_NTUPLE | NETIF_F_HW_TC; - hw_features = hw_enc_features | NETIF_F_HW_VLAN_CTAG_TX | NETIF_F_HW_VLAN_CTAG_RX; + if (!(pf->flags & I40E_FLAG_MFP_ENABLED)) + hw_features |= NETIF_F_NTUPLE | NETIF_F_HW_TC; + netdev->hw_features |= hw_features; netdev->features |= hw_features | NETIF_F_HW_VLAN_CTAG_FILTER; From d6fd0ae25c6495674dc5a41a8d16bc8e0073276d Mon Sep 17 00:00:00 2001 From: Omar Sandoval <osandov@fb.com> Date: Wed, 31 Oct 2018 10:06:08 -0700 Subject: [PATCH 244/443] Btrfs: fix missing delayed iputs on unmount There's a race between close_ctree() and cleaner_kthread(). close_ctree() sets btrfs_fs_closing(), and the cleaner stops when it sees it set, but this is racy; the cleaner might have already checked the bit and could be cleaning stuff. In particular, if it deletes unused block groups, it will create delayed iputs for the free space cache inodes. As of "btrfs: don't run delayed_iputs in commit", we're no longer running delayed iputs after a commit. Therefore, if the cleaner creates more delayed iputs after delayed iputs are run in btrfs_commit_super(), we will leak inodes on unmount and get a busy inode crash from the VFS. Fix it by parking the cleaner before we actually close anything. Then, any remaining delayed iputs will always be handled in btrfs_commit_super(). This also ensures that the commit in close_ctree() is really the last commit, so we can get rid of the commit in cleaner_kthread(). The fstest/generic/475 followed by 476 can trigger a crash that manifests as a slab corruption caused by accessing the freed kthread structure by a wake up function. Sample trace: [ 5657.077612] BUG: unable to handle kernel NULL pointer dereference at 00000000000000cc [ 5657.079432] PGD 1c57a067 P4D 1c57a067 PUD da10067 PMD 0 [ 5657.080661] Oops: 0000 [#1] PREEMPT SMP [ 5657.081592] CPU: 1 PID: 5157 Comm: fsstress Tainted: G W 4.19.0-rc8-default+ #323 [ 5657.083703] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS rel-1.11.2-0-gf9626cc-prebuilt.qemu-project.org 04/01/2014 [ 5657.086577] RIP: 0010:shrink_page_list+0x2f9/0xe90 [ 5657.091937] RSP: 0018:ffffb5c745c8f728 EFLAGS: 00010287 [ 5657.092953] RAX: 0000000000000074 RBX: ffffb5c745c8f830 RCX: 0000000000000000 [ 5657.094590] RDX: 0000000000000000 RSI: 0000000000000001 RDI: ffff9a8747fdf3d0 [ 5657.095987] RBP: ffffb5c745c8f9e0 R08: 0000000000000000 R09: 0000000000000000 [ 5657.097159] R10: ffff9a8747fdf5e8 R11: 0000000000000000 R12: ffffb5c745c8f788 [ 5657.098513] R13: ffff9a877f6ff2c0 R14: ffff9a877f6ff2c8 R15: dead000000000200 [ 5657.099689] FS: 00007f948d853b80(0000) GS:ffff9a877d600000(0000) knlGS:0000000000000000 [ 5657.101032] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ 5657.101953] CR2: 00000000000000cc CR3: 00000000684bd000 CR4: 00000000000006e0 [ 5657.103159] Call Trace: [ 5657.103776] shrink_inactive_list+0x194/0x410 [ 5657.104671] shrink_node_memcg.constprop.84+0x39a/0x6a0 [ 5657.105750] shrink_node+0x62/0x1c0 [ 5657.106529] try_to_free_pages+0x1a4/0x500 [ 5657.107408] __alloc_pages_slowpath+0x2c9/0xb20 [ 5657.108418] __alloc_pages_nodemask+0x268/0x2b0 [ 5657.109348] kmalloc_large_node+0x37/0x90 [ 5657.110205] __kmalloc_node+0x236/0x310 [ 5657.111014] kvmalloc_node+0x3e/0x70 Fixes: 30928e9baac2 ("btrfs: don't run delayed_iputs in commit") Signed-off-by: Omar Sandoval <osandov@fb.com> Reviewed-by: David Sterba <dsterba@suse.com> [ add trace ] Signed-off-by: David Sterba <dsterba@suse.com> --- fs/btrfs/disk-io.c | 51 ++++++++++++++-------------------------------- 1 file changed, 15 insertions(+), 36 deletions(-) diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 00ee5e37e9897..3f0b6d1936e8e 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -1664,9 +1664,8 @@ static int cleaner_kthread(void *arg) struct btrfs_root *root = arg; struct btrfs_fs_info *fs_info = root->fs_info; int again; - struct btrfs_trans_handle *trans; - do { + while (1) { again = 0; /* Make the cleaner go to sleep early. */ @@ -1715,42 +1714,16 @@ static int cleaner_kthread(void *arg) */ btrfs_delete_unused_bgs(fs_info); sleep: + if (kthread_should_park()) + kthread_parkme(); + if (kthread_should_stop()) + return 0; if (!again) { set_current_state(TASK_INTERRUPTIBLE); - if (!kthread_should_stop()) - schedule(); + schedule(); __set_current_state(TASK_RUNNING); } - } while (!kthread_should_stop()); - - /* - * Transaction kthread is stopped before us and wakes us up. - * However we might have started a new transaction and COWed some - * tree blocks when deleting unused block groups for example. So - * make sure we commit the transaction we started to have a clean - * shutdown when evicting the btree inode - if it has dirty pages - * when we do the final iput() on it, eviction will trigger a - * writeback for it which will fail with null pointer dereferences - * since work queues and other resources were already released and - * destroyed by the time the iput/eviction/writeback is made. - */ - trans = btrfs_attach_transaction(root); - if (IS_ERR(trans)) { - if (PTR_ERR(trans) != -ENOENT) - btrfs_err(fs_info, - "cleaner transaction attach returned %ld", - PTR_ERR(trans)); - } else { - int ret; - - ret = btrfs_commit_transaction(trans); - if (ret) - btrfs_err(fs_info, - "cleaner open transaction commit returned %d", - ret); } - - return 0; } static int transaction_kthread(void *arg) @@ -3931,6 +3904,13 @@ void close_ctree(struct btrfs_fs_info *fs_info) int ret; set_bit(BTRFS_FS_CLOSING_START, &fs_info->flags); + /* + * We don't want the cleaner to start new transactions, add more delayed + * iputs, etc. while we're closing. We can't use kthread_stop() yet + * because that frees the task_struct, and the transaction kthread might + * still try to wake up the cleaner. + */ + kthread_park(fs_info->cleaner_kthread); /* wait for the qgroup rescan worker to stop */ btrfs_qgroup_wait_for_completion(fs_info, false); @@ -3958,9 +3938,8 @@ void close_ctree(struct btrfs_fs_info *fs_info) if (!sb_rdonly(fs_info->sb)) { /* - * If the cleaner thread is stopped and there are - * block groups queued for removal, the deletion will be - * skipped when we quit the cleaner thread. + * The cleaner kthread is stopped, so do one final pass over + * unused block groups. */ btrfs_delete_unused_bgs(fs_info); From df376b2ed51a2777c3398e038992f62523c0f932 Mon Sep 17 00:00:00 2001 From: Johannes Thumshirn <jthumshirn@suse.de> Date: Wed, 7 Nov 2018 14:58:14 +0100 Subject: [PATCH 245/443] block: respect virtual boundary mask in bvecs With drivers that are settting a virtual boundary constrain, we are seeing a lot of bio splitting and smaller I/Os being submitted to the driver. This happens because the bio gap detection code does not account cases where PAGE_SIZE - 1 is bigger than queue_virt_boundary() and thus will split the bio unnecessarily. Cc: Jan Kara <jack@suse.cz> Cc: Bart Van Assche <bvanassche@acm.org> Cc: Ming Lei <ming.lei@redhat.com> Reviewed-by: Sagi Grimberg <sagi@grimberg.me> Signed-off-by: Johannes Thumshirn <jthumshirn@suse.de> Acked-by: Keith Busch <keith.busch@intel.com> Reviewed-by: Ming Lei <ming.lei@redhat.com> Signed-off-by: Jens Axboe <axboe@kernel.dk> --- block/blk-merge.c | 2 +- block/blk.h | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/block/blk-merge.c b/block/blk-merge.c index 6b5ad275ed565..208658a901c65 100644 --- a/block/blk-merge.c +++ b/block/blk-merge.c @@ -46,7 +46,7 @@ static inline bool bio_will_gap(struct request_queue *q, bio_get_first_bvec(prev_rq->bio, &pb); else bio_get_first_bvec(prev, &pb); - if (pb.bv_offset) + if (pb.bv_offset & queue_virt_boundary(q)) return true; /* diff --git a/block/blk.h b/block/blk.h index a1841b8ff1296..c85e53f21cdd8 100644 --- a/block/blk.h +++ b/block/blk.h @@ -169,7 +169,7 @@ static inline bool biovec_phys_mergeable(struct request_queue *q, static inline bool __bvec_gap_to_prev(struct request_queue *q, struct bio_vec *bprv, unsigned int offset) { - return offset || + return (offset & queue_virt_boundary(q)) || ((bprv->bv_offset + bprv->bv_len) & queue_virt_boundary(q)); } From 3a40cfe8ba3abba57af2c7e26aad2c6dd1681864 Mon Sep 17 00:00:00 2001 From: Baolin Wang <baolin.wang@linaro.org> Date: Wed, 7 Nov 2018 13:42:58 +0800 Subject: [PATCH 246/443] leds: trigger: Fix sleeping function called from invalid context We will meet below issue due to mutex_lock() is called in interrupt context. The mutex lock is used to protect the pattern trigger data, but before changing new pattern trigger data (pattern values or repeat value) by users, we always cancel the timer firstly to clear previous patterns' performance. That means there is no race in pattern_trig_timer_function(), so we can drop the mutex lock in pattern_trig_timer_function() to avoid this issue. Moreover we can move the timer cancelling into mutex protection, since there is no deadlock risk if we remove the mutex lock in pattern_trig_timer_function(). BUG: sleeping function called from invalid context at kernel/locking/mutex.c:254 in_atomic(): 1, irqs_disabled(): 0, pid: 0, name: swapper/1 CPU: 1 PID: 0 Comm: swapper/1 Not tainted 4.20.0-rc1-koelsch-00841-ga338c8181013c1a9 #171 Hardware name: Generic R-Car Gen2 (Flattened Device Tree) [<c020f19c>] (unwind_backtrace) from [<c020aecc>] (show_stack+0x10/0x14) [<c020aecc>] (show_stack) from [<c07affb8>] (dump_stack+0x7c/0x9c) [<c07affb8>] (dump_stack) from [<c02417d4>] (___might_sleep+0xf4/0x158) [<c02417d4>] (___might_sleep) from [<c07c92c4>] (mutex_lock+0x18/0x60) [<c07c92c4>] (mutex_lock) from [<c067b28c>] (pattern_trig_timer_function+0x1c/0x11c) [<c067b28c>] (pattern_trig_timer_function) from [<c027f6fc>] (call_timer_fn+0x1c/0x90) [<c027f6fc>] (call_timer_fn) from [<c027f944>] (expire_timers+0x94/0xa4) [<c027f944>] (expire_timers) from [<c027fc98>] (run_timer_softirq+0x108/0x15c) [<c027fc98>] (run_timer_softirq) from [<c02021cc>] (__do_softirq+0x1d4/0x258) [<c02021cc>] (__do_softirq) from [<c0224d24>] (irq_exit+0x64/0xc4) [<c0224d24>] (irq_exit) from [<c0268dd0>] (__handle_domain_irq+0x80/0xb4) [<c0268dd0>] (__handle_domain_irq) from [<c045e1b0>] (gic_handle_irq+0x58/0x90) [<c045e1b0>] (gic_handle_irq) from [<c02019f8>] (__irq_svc+0x58/0x74) Exception stack(0xeb483f60 to 0xeb483fa8) 3f60: 00000000 00000000 eb9afaa0 c0217e80 00000000 ffffe000 00000000 c0e06408 3f80: 00000002 c0e0647c c0c6a5f0 00000000 c0e04900 eb483fb0 c0207ea8 c0207e98 3fa0: 60020013 ffffffff [<c02019f8>] (__irq_svc) from [<c0207e98>] (arch_cpu_idle+0x1c/0x38) [<c0207e98>] (arch_cpu_idle) from [<c0247ca8>] (do_idle+0x138/0x268) [<c0247ca8>] (do_idle) from [<c0248050>] (cpu_startup_entry+0x18/0x1c) [<c0248050>] (cpu_startup_entry) from [<402022ec>] (0x402022ec) Fixes: 5fd752b6b3a2 ("leds: core: Introduce LED pattern trigger") Signed-off-by: Baolin Wang <baolin.wang@linaro.org> Reported-by: Geert Uytterhoeven <geert+renesas@glider.be> Tested-by: Geert Uytterhoeven <geert+renesas@glider.be> Signed-off-by: Jacek Anaszewski <jacek.anaszewski@gmail.com> --- drivers/leds/trigger/ledtrig-pattern.c | 20 ++++---------------- 1 file changed, 4 insertions(+), 16 deletions(-) diff --git a/drivers/leds/trigger/ledtrig-pattern.c b/drivers/leds/trigger/ledtrig-pattern.c index 174a298f1be02..1870cf87afe1e 100644 --- a/drivers/leds/trigger/ledtrig-pattern.c +++ b/drivers/leds/trigger/ledtrig-pattern.c @@ -75,8 +75,6 @@ static void pattern_trig_timer_function(struct timer_list *t) { struct pattern_trig_data *data = from_timer(data, t, timer); - mutex_lock(&data->lock); - for (;;) { if (!data->is_indefinite && !data->repeat) break; @@ -117,8 +115,6 @@ static void pattern_trig_timer_function(struct timer_list *t) break; } - - mutex_unlock(&data->lock); } static int pattern_trig_start_pattern(struct led_classdev *led_cdev) @@ -177,14 +173,10 @@ static ssize_t repeat_store(struct device *dev, struct device_attribute *attr, if (res < -1 || res == 0) return -EINVAL; - /* - * Clear previous patterns' performence firstly, and remove the timer - * without mutex lock to avoid dead lock. - */ - del_timer_sync(&data->timer); - mutex_lock(&data->lock); + del_timer_sync(&data->timer); + if (data->is_hw_pattern) led_cdev->pattern_clear(led_cdev); @@ -235,14 +227,10 @@ static ssize_t pattern_trig_store_patterns(struct led_classdev *led_cdev, struct pattern_trig_data *data = led_cdev->trigger_data; int ccount, cr, offset = 0, err = 0; - /* - * Clear previous patterns' performence firstly, and remove the timer - * without mutex lock to avoid dead lock. - */ - del_timer_sync(&data->timer); - mutex_lock(&data->lock); + del_timer_sync(&data->timer); + if (data->is_hw_pattern) led_cdev->pattern_clear(led_cdev); From 406e7f986b2e4499295351bcfff5c93b0d34022a Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven <geert+renesas@glider.be> Date: Wed, 7 Nov 2018 14:45:24 +0100 Subject: [PATCH 247/443] Documentation: ABI: led-trigger-pattern: Fix typos - Spelling s/brigntess/brightness/, - Double "use". Signed-off-by: Geert Uytterhoeven <geert+renesas@glider.be> Acked-by: Pavel Machek <pavel@ucw.cz> Signed-off-by: Jacek Anaszewski <jacek.anaszewski@gmail.com> --- Documentation/ABI/testing/sysfs-class-led-trigger-pattern | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Documentation/ABI/testing/sysfs-class-led-trigger-pattern b/Documentation/ABI/testing/sysfs-class-led-trigger-pattern index fb3d1e03b8819..1e5d172e06462 100644 --- a/Documentation/ABI/testing/sysfs-class-led-trigger-pattern +++ b/Documentation/ABI/testing/sysfs-class-led-trigger-pattern @@ -37,8 +37,8 @@ Description: 0-| / \/ \/ +---0----1----2----3----4----5----6------------> time (s) - 2. To make the LED go instantly from one brigntess value to another, - we should use use zero-time lengths (the brightness must be same as + 2. To make the LED go instantly from one brightness value to another, + we should use zero-time lengths (the brightness must be same as the previous tuple's). So the format should be: "brightness_1 duration_1 brightness_1 0 brightness_2 duration_2 brightness_2 0 ...". For example: From cf3d02a185edf449d30465dae8d22a4979545829 Mon Sep 17 00:00:00 2001 From: Sean Paul <seanpaul@chromium.org> Date: Wed, 7 Nov 2018 15:55:33 -0500 Subject: [PATCH 248/443] drm: Fix htmldocs warnings in drm_fourcc.c Add a description for dev and remove the excess one for native. Fixes the following warnings: ../drivers/gpu/drm/drm_fourcc.c:112: warning: Function parameter or member 'dev' not described in 'drm_driver_legacy_fb_format' ../drivers/gpu/drm/drm_fourcc.c:112: warning: Excess function parameter 'native' description in 'drm_driver_legacy_fb_format' Fixes: 059b5eb5d955 ("drm: move native byte order quirk to new drm_driver_legacy_fb_format function") Cc: Gerd Hoffmann <kraxel@redhat.com> Cc: Daniel Vetter <daniel.vetter@ffwll.ch> Reviewed-by: Daniel Vetter <daniel.vetter@ffwll.ch> Signed-off-by: Sean Paul <seanpaul@chromium.org> Link: https://patchwork.freedesktop.org/patch/msgid/20181107205546.216088-1-sean@poorly.run --- drivers/gpu/drm/drm_fourcc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/drm_fourcc.c b/drivers/gpu/drm/drm_fourcc.c index 90a1c846fc25a..8aaa5e86a979c 100644 --- a/drivers/gpu/drm/drm_fourcc.c +++ b/drivers/gpu/drm/drm_fourcc.c @@ -97,9 +97,9 @@ EXPORT_SYMBOL(drm_mode_legacy_fb_format); /** * drm_driver_legacy_fb_format - compute drm fourcc code from legacy description + * @dev: DRM device * @bpp: bits per pixels * @depth: bit depth per pixel - * @native: use host native byte order * * Computes a drm fourcc pixel format code for the given @bpp/@depth values. * Unlike drm_mode_legacy_fb_format() this looks at the drivers mode_config, From 6961cd4d0fde97f1cdb798ba21cf124ecfa0bf95 Mon Sep 17 00:00:00 2001 From: Jens Axboe <axboe@kernel.dk> Date: Wed, 7 Nov 2018 14:34:05 -0700 Subject: [PATCH 249/443] ubd: fix missing lock around request issue We need to hold the device lock (and disable interrupts) while writing new commands, or we could be interrupted while that is happening and read invalid requests in the completion path. Fixes: 4e6da0fe8058 ("um: Convert ubd driver to blk-mq") Tested-by: Richard Weinberger <richard@nod.at> Signed-off-by: Jens Axboe <axboe@kernel.dk> --- arch/um/drivers/ubd_kern.c | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/arch/um/drivers/ubd_kern.c b/arch/um/drivers/ubd_kern.c index 74c002ddc0ce7..08831f5d83db0 100644 --- a/arch/um/drivers/ubd_kern.c +++ b/arch/um/drivers/ubd_kern.c @@ -1341,11 +1341,14 @@ static int ubd_queue_one_vec(struct blk_mq_hw_ctx *hctx, struct request *req, static blk_status_t ubd_queue_rq(struct blk_mq_hw_ctx *hctx, const struct blk_mq_queue_data *bd) { + struct ubd *ubd_dev = hctx->queue->queuedata; struct request *req = bd->rq; int ret = 0; blk_mq_start_request(req); + spin_lock_irq(&ubd_dev->lock); + if (req_op(req) == REQ_OP_FLUSH) { ret = ubd_queue_one_vec(hctx, req, 0, NULL); } else { @@ -1361,9 +1364,11 @@ static blk_status_t ubd_queue_rq(struct blk_mq_hw_ctx *hctx, } } out: - if (ret < 0) { + spin_unlock_irq(&ubd_dev->lock); + + if (ret < 0) blk_mq_requeue_request(req, true); - } + return BLK_STS_OK; } From e31d36b0a453d581fb077fce6883811c5e14874d Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven <geert+renesas@glider.be> Date: Wed, 7 Nov 2018 10:47:57 +0100 Subject: [PATCH 250/443] MAINTAINERS: Fix remaining pointers to obsolete libata.git libata.git no longer exists. Replace the remaining pointers to it by pointers to the block tree, which is where all libata development happens now. Signed-off-by: Geert Uytterhoeven <geert+renesas@glider.be> Signed-off-by: Jens Axboe <axboe@kernel.dk> --- MAINTAINERS | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/MAINTAINERS b/MAINTAINERS index 2b928aa857ce4..262ece39cc471 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -8343,7 +8343,7 @@ F: drivers/media/dvb-frontends/lgdt3305.* LIBATA PATA ARASAN COMPACT FLASH CONTROLLER M: Viresh Kumar <vireshk@kernel.org> L: linux-ide@vger.kernel.org -T: git git://git.kernel.org/pub/scm/linux/kernel/git/tj/libata.git +T: git git://git.kernel.org/pub/scm/linux/kernel/git/axboe/linux-block.git S: Maintained F: include/linux/pata_arasan_cf_data.h F: drivers/ata/pata_arasan_cf.c @@ -8360,7 +8360,7 @@ F: drivers/ata/ata_generic.c LIBATA PATA FARADAY FTIDE010 AND GEMINI SATA BRIDGE DRIVERS M: Linus Walleij <linus.walleij@linaro.org> L: linux-ide@vger.kernel.org -T: git git://git.kernel.org/pub/scm/linux/kernel/git/tj/libata.git +T: git git://git.kernel.org/pub/scm/linux/kernel/git/axboe/linux-block.git S: Maintained F: drivers/ata/pata_ftide010.c F: drivers/ata/sata_gemini.c @@ -8379,7 +8379,7 @@ F: include/linux/ahci_platform.h LIBATA SATA PROMISE TX2/TX4 CONTROLLER DRIVER M: Mikael Pettersson <mikpelinux@gmail.com> L: linux-ide@vger.kernel.org -T: git git://git.kernel.org/pub/scm/linux/kernel/git/tj/libata.git +T: git git://git.kernel.org/pub/scm/linux/kernel/git/axboe/linux-block.git S: Maintained F: drivers/ata/sata_promise.* From f3587d76da05f68098ddb1cb3c98cc6a9e8a402c Mon Sep 17 00:00:00 2001 From: Keith Busch <keith.busch@intel.com> Date: Wed, 7 Nov 2018 07:37:45 -0700 Subject: [PATCH 251/443] block: Clear kernel memory before copying to user If the kernel allocates a bounce buffer for user read data, this memory needs to be cleared before copying it to the user, otherwise it may leak kernel memory to user space. Laurence Oberman <loberman@redhat.com> Signed-off-by: Keith Busch <keith.busch@intel.com> Signed-off-by: Jens Axboe <axboe@kernel.dk> --- block/bio.c | 1 + 1 file changed, 1 insertion(+) diff --git a/block/bio.c b/block/bio.c index 4a5a036268fb8..9a9c590675213 100644 --- a/block/bio.c +++ b/block/bio.c @@ -1260,6 +1260,7 @@ struct bio *bio_copy_user_iov(struct request_queue *q, if (ret) goto cleanup; } else { + zero_fill_bio(bio); iov_iter_advance(iter, bio->bi_iter.bi_size); } From 3c7eda0b65ad279dd2f26908d2f4bd088da93996 Mon Sep 17 00:00:00 2001 From: Evan Quan <evan.quan@amd.com> Date: Wed, 24 Oct 2018 12:11:35 +0800 Subject: [PATCH 252/443] drm/amd/powerplay: set a default fclk/gfxclk ratio Otherwise big gap between these two clocks may causes some hangs. Signed-off-by: Evan Quan <evan.quan@amd.com> Reviewed-by: Feifei Xu <Feifei.Xu@amd.com> Signed-off-by: Alex Deucher <alexander.deucher@amd.com> --- .../gpu/drm/amd/powerplay/hwmgr/vega20_hwmgr.c | 16 ++++++++++++++++ .../gpu/drm/amd/powerplay/hwmgr/vega20_hwmgr.h | 1 + drivers/gpu/drm/amd/powerplay/inc/vega20_ppsmc.h | 3 ++- 3 files changed, 19 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/vega20_hwmgr.c b/drivers/gpu/drm/amd/powerplay/hwmgr/vega20_hwmgr.c index 57143d51e3eed..8bf76108cf393 100644 --- a/drivers/gpu/drm/amd/powerplay/hwmgr/vega20_hwmgr.c +++ b/drivers/gpu/drm/amd/powerplay/hwmgr/vega20_hwmgr.c @@ -120,6 +120,7 @@ static void vega20_set_default_registry_data(struct pp_hwmgr *hwmgr) data->registry_data.disable_auto_wattman = 1; data->registry_data.auto_wattman_debug = 0; data->registry_data.auto_wattman_sample_period = 100; + data->registry_data.fclk_gfxclk_ratio = 0x3F6CCCCD; data->registry_data.auto_wattman_threshold = 50; data->registry_data.gfxoff_controlled_by_driver = 1; data->gfxoff_allowed = false; @@ -829,6 +830,16 @@ static int vega20_enable_all_smu_features(struct pp_hwmgr *hwmgr) return 0; } +static int vega20_send_clock_ratio(struct pp_hwmgr *hwmgr) +{ + struct vega20_hwmgr *data = + (struct vega20_hwmgr *)(hwmgr->backend); + + return smum_send_msg_to_smc_with_parameter(hwmgr, + PPSMC_MSG_SetFclkGfxClkRatio, + data->registry_data.fclk_gfxclk_ratio); +} + static int vega20_disable_all_smu_features(struct pp_hwmgr *hwmgr) { struct vega20_hwmgr *data = @@ -1532,6 +1543,11 @@ static int vega20_enable_dpm_tasks(struct pp_hwmgr *hwmgr) "[EnableDPMTasks] Failed to enable all smu features!", return result); + result = vega20_send_clock_ratio(hwmgr); + PP_ASSERT_WITH_CODE(!result, + "[EnableDPMTasks] Failed to send clock ratio!", + return result); + /* Initialize UVD/VCE powergating state */ vega20_init_powergate_state(hwmgr); diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/vega20_hwmgr.h b/drivers/gpu/drm/amd/powerplay/hwmgr/vega20_hwmgr.h index 56fe6a0d42e80..25faaa5c5b10c 100644 --- a/drivers/gpu/drm/amd/powerplay/hwmgr/vega20_hwmgr.h +++ b/drivers/gpu/drm/amd/powerplay/hwmgr/vega20_hwmgr.h @@ -328,6 +328,7 @@ struct vega20_registry_data { uint8_t disable_auto_wattman; uint32_t auto_wattman_debug; uint32_t auto_wattman_sample_period; + uint32_t fclk_gfxclk_ratio; uint8_t auto_wattman_threshold; uint8_t log_avfs_param; uint8_t enable_enginess; diff --git a/drivers/gpu/drm/amd/powerplay/inc/vega20_ppsmc.h b/drivers/gpu/drm/amd/powerplay/inc/vega20_ppsmc.h index 45d64a81e9453..4f63a736ea0e7 100644 --- a/drivers/gpu/drm/amd/powerplay/inc/vega20_ppsmc.h +++ b/drivers/gpu/drm/amd/powerplay/inc/vega20_ppsmc.h @@ -105,7 +105,8 @@ #define PPSMC_MSG_SetSystemVirtualDramAddrHigh 0x4B #define PPSMC_MSG_SetSystemVirtualDramAddrLow 0x4C #define PPSMC_MSG_WaflTest 0x4D -// Unused ID 0x4E to 0x50 +#define PPSMC_MSG_SetFclkGfxClkRatio 0x4E +// Unused ID 0x4F to 0x50 #define PPSMC_MSG_AllowGfxOff 0x51 #define PPSMC_MSG_DisallowGfxOff 0x52 #define PPSMC_MSG_GetPptLimit 0x53 From 108110a3ffa3483d743f3bff93917ba64dc8edd0 Mon Sep 17 00:00:00 2001 From: Evan Quan <evan.quan@amd.com> Date: Wed, 7 Nov 2018 09:16:07 +0800 Subject: [PATCH 253/443] drm/amd/powerplay: always use fast UCLK switching when UCLK DPM enabled With UCLK DPM enabled, slow switching is not supported any more. Signed-off-by: Evan Quan <evan.quan@amd.com> Reviewed-by: Feifei Xu <Feifei.Xu@amd.com> Signed-off-by: Alex Deucher <alexander.deucher@amd.com> --- .../drm/amd/powerplay/hwmgr/vega20_hwmgr.c | 37 +++++++++---------- 1 file changed, 17 insertions(+), 20 deletions(-) diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/vega20_hwmgr.c b/drivers/gpu/drm/amd/powerplay/hwmgr/vega20_hwmgr.c index 8bf76108cf393..99861f32b1f95 100644 --- a/drivers/gpu/drm/amd/powerplay/hwmgr/vega20_hwmgr.c +++ b/drivers/gpu/drm/amd/powerplay/hwmgr/vega20_hwmgr.c @@ -830,6 +830,18 @@ static int vega20_enable_all_smu_features(struct pp_hwmgr *hwmgr) return 0; } +static int vega20_notify_smc_display_change(struct pp_hwmgr *hwmgr) +{ + struct vega20_hwmgr *data = (struct vega20_hwmgr *)(hwmgr->backend); + + if (data->smu_features[GNLD_DPM_UCLK].enabled) + return smum_send_msg_to_smc_with_parameter(hwmgr, + PPSMC_MSG_SetUclkFastSwitch, + 1); + + return 0; +} + static int vega20_send_clock_ratio(struct pp_hwmgr *hwmgr) { struct vega20_hwmgr *data = @@ -1543,6 +1555,11 @@ static int vega20_enable_dpm_tasks(struct pp_hwmgr *hwmgr) "[EnableDPMTasks] Failed to enable all smu features!", return result); + result = vega20_notify_smc_display_change(hwmgr); + PP_ASSERT_WITH_CODE(!result, + "[EnableDPMTasks] Failed to notify smc display change!", + return result); + result = vega20_send_clock_ratio(hwmgr); PP_ASSERT_WITH_CODE(!result, "[EnableDPMTasks] Failed to send clock ratio!", @@ -1988,19 +2005,6 @@ static int vega20_read_sensor(struct pp_hwmgr *hwmgr, int idx, return ret; } -static int vega20_notify_smc_display_change(struct pp_hwmgr *hwmgr, - bool has_disp) -{ - struct vega20_hwmgr *data = (struct vega20_hwmgr *)(hwmgr->backend); - - if (data->smu_features[GNLD_DPM_UCLK].enabled) - return smum_send_msg_to_smc_with_parameter(hwmgr, - PPSMC_MSG_SetUclkFastSwitch, - has_disp ? 1 : 0); - - return 0; -} - int vega20_display_clock_voltage_request(struct pp_hwmgr *hwmgr, struct pp_display_clock_request *clock_req) { @@ -2060,13 +2064,6 @@ static int vega20_notify_smc_display_config_after_ps_adjustment( struct pp_display_clock_request clock_req; int ret = 0; - if ((hwmgr->display_config->num_display > 1) && - !hwmgr->display_config->multi_monitor_in_sync && - !hwmgr->display_config->nb_pstate_switch_disable) - vega20_notify_smc_display_change(hwmgr, false); - else - vega20_notify_smc_display_change(hwmgr, true); - min_clocks.dcefClock = hwmgr->display_config->min_dcef_set_clk; min_clocks.dcefClockInSR = hwmgr->display_config->min_dcef_deep_sleep_set_clk; min_clocks.memoryClock = hwmgr->display_config->min_mem_set_clock; From 8be17ac95f8451b51f636622d8d4b8674334f468 Mon Sep 17 00:00:00 2001 From: "Jerry (Fangzhi) Zuo" <Jerry.Zuo@amd.com> Date: Tue, 30 Oct 2018 14:28:49 -0400 Subject: [PATCH 254/443] drm/amd/display: Cleanup MST non-atomic code workaround [why] It is not correct to touch aconnector within atomic_check. [How] It was added as workaround before, and no longer needed. Signed-off-by: Jerry (Fangzhi) Zuo <Jerry.Zuo@amd.com> Reviewed-by: Harry Wentland <harry.wentland@amd.com> Reviewed-by: Lyude Paul <lyude@redhat.com> Signed-off-by: Alex Deucher <alexander.deucher@amd.com> --- .../gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c | 15 +++----- .../display/amdgpu_dm/amdgpu_dm_mst_types.c | 34 ------------------- .../display/amdgpu_dm/amdgpu_dm_mst_types.h | 1 - 3 files changed, 4 insertions(+), 46 deletions(-) diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c index d3f5cb1795bf8..c1262f62cd9f2 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c @@ -2703,18 +2703,11 @@ create_stream_for_sink(struct amdgpu_dm_connector *aconnector, drm_connector = &aconnector->base; if (!aconnector->dc_sink) { - /* - * Create dc_sink when necessary to MST - * Don't apply fake_sink to MST - */ - if (aconnector->mst_port) { - dm_dp_mst_dc_sink_create(drm_connector); - return stream; + if (!aconnector->mst_port) { + sink = create_fake_sink(aconnector); + if (!sink) + return stream; } - - sink = create_fake_sink(aconnector); - if (!sink) - return stream; } else { sink = aconnector->dc_sink; } diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.c index 03601d717fed9..03e98967d6c2f 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.c @@ -205,40 +205,6 @@ static const struct drm_connector_funcs dm_dp_mst_connector_funcs = { .atomic_get_property = amdgpu_dm_connector_atomic_get_property }; -void dm_dp_mst_dc_sink_create(struct drm_connector *connector) -{ - struct amdgpu_dm_connector *aconnector = to_amdgpu_dm_connector(connector); - struct dc_sink *dc_sink; - struct dc_sink_init_data init_params = { - .link = aconnector->dc_link, - .sink_signal = SIGNAL_TYPE_DISPLAY_PORT_MST }; - - /* FIXME none of this is safe. we shouldn't touch aconnector here in - * atomic_check - */ - - /* - * TODO: Need to further figure out why ddc.algo is NULL while MST port exists - */ - if (!aconnector->port || !aconnector->port->aux.ddc.algo) - return; - - ASSERT(aconnector->edid); - - dc_sink = dc_link_add_remote_sink( - aconnector->dc_link, - (uint8_t *)aconnector->edid, - (aconnector->edid->extensions + 1) * EDID_LENGTH, - &init_params); - - dc_sink->priv = aconnector; - aconnector->dc_sink = dc_sink; - - if (aconnector->dc_sink) - amdgpu_dm_update_freesync_caps( - connector, aconnector->edid); -} - static int dm_dp_mst_get_modes(struct drm_connector *connector) { struct amdgpu_dm_connector *aconnector = to_amdgpu_dm_connector(connector); diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.h b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.h index 8cf51da26657e..2da851b40042a 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.h +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.h @@ -31,6 +31,5 @@ struct amdgpu_dm_connector; void amdgpu_dm_initialize_dp_connector(struct amdgpu_display_manager *dm, struct amdgpu_dm_connector *aconnector); -void dm_dp_mst_dc_sink_create(struct drm_connector *connector); #endif From 0e6613e46fed29316f33acf86e1d1568288638b5 Mon Sep 17 00:00:00 2001 From: "Jerry (Fangzhi) Zuo" <Jerry.Zuo@amd.com> Date: Tue, 30 Oct 2018 14:37:16 -0400 Subject: [PATCH 255/443] drm/amd/display: Drop reusing drm connector for MST [why] It is not safe to keep existing connector while entire topology has been removed. Could lead potential impact to uapi. Entirely unregister all the connectors on the topology, and use a new set of connectors when the topology is plugged back on. [How] Remove the drm connector entirely each time when the corresponding MST topology is gone. When hotunplug a connector (e.g., DP2) 1. Remove connector from userspace. 2. Drop it's reference. When hotplug back on: 1. Detect new topology, and create new connectors. 2. Notify userspace with sysfs hotplug event. 3. Reprobe new connectors, and reassign CRTC from old (e.g., DP2) to new (e.g., DP3) connector. Signed-off-by: Jerry (Fangzhi) Zuo <Jerry.Zuo@amd.com> Reviewed-by: Harry Wentland <harry.wentland@amd.com> Reviewed-by: Lyude Paul <lyude@redhat.com> Signed-off-by: Alex Deucher <alexander.deucher@amd.com> --- .../gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h | 2 - .../display/amdgpu_dm/amdgpu_dm_mst_types.c | 40 ++++--------------- 2 files changed, 7 insertions(+), 35 deletions(-) diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h index 978b34a5011ce..924a38a1fc446 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h @@ -160,8 +160,6 @@ struct amdgpu_dm_connector { struct mutex hpd_lock; bool fake_enable; - - bool mst_connected; }; #define to_amdgpu_dm_connector(x) container_of(x, struct amdgpu_dm_connector, base) diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.c index 03e98967d6c2f..31126eb23a740 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.c @@ -320,25 +320,6 @@ dm_dp_add_mst_connector(struct drm_dp_mst_topology_mgr *mgr, struct amdgpu_device *adev = dev->dev_private; struct amdgpu_dm_connector *aconnector; struct drm_connector *connector; - struct drm_connector_list_iter conn_iter; - - drm_connector_list_iter_begin(dev, &conn_iter); - drm_for_each_connector_iter(connector, &conn_iter) { - aconnector = to_amdgpu_dm_connector(connector); - if (aconnector->mst_port == master - && !aconnector->port) { - DRM_INFO("DM_MST: reusing connector: %p [id: %d] [master: %p]\n", - aconnector, connector->base.id, aconnector->mst_port); - - aconnector->port = port; - drm_connector_set_path_property(connector, pathprop); - - drm_connector_list_iter_end(&conn_iter); - aconnector->mst_connected = true; - return &aconnector->base; - } - } - drm_connector_list_iter_end(&conn_iter); aconnector = kzalloc(sizeof(*aconnector), GFP_KERNEL); if (!aconnector) @@ -387,8 +368,6 @@ dm_dp_add_mst_connector(struct drm_dp_mst_topology_mgr *mgr, */ amdgpu_dm_connector_funcs_reset(connector); - aconnector->mst_connected = true; - DRM_INFO("DM_MST: added connector: %p [id: %d] [master: %p]\n", aconnector, connector->base.id, aconnector->mst_port); @@ -400,6 +379,9 @@ dm_dp_add_mst_connector(struct drm_dp_mst_topology_mgr *mgr, static void dm_dp_destroy_mst_connector(struct drm_dp_mst_topology_mgr *mgr, struct drm_connector *connector) { + struct amdgpu_dm_connector *master = container_of(mgr, struct amdgpu_dm_connector, mst_mgr); + struct drm_device *dev = master->base.dev; + struct amdgpu_device *adev = dev->dev_private; struct amdgpu_dm_connector *aconnector = to_amdgpu_dm_connector(connector); DRM_INFO("DM_MST: Disabling connector: %p [id: %d] [master: %p]\n", @@ -413,7 +395,10 @@ static void dm_dp_destroy_mst_connector(struct drm_dp_mst_topology_mgr *mgr, aconnector->dc_sink = NULL; } - aconnector->mst_connected = false; + drm_connector_unregister(connector); + if (adev->mode_info.rfbdev) + drm_fb_helper_remove_one_connector(&adev->mode_info.rfbdev->helper, connector); + drm_connector_put(connector); } static void dm_dp_mst_hotplug(struct drm_dp_mst_topology_mgr *mgr) @@ -424,18 +409,10 @@ static void dm_dp_mst_hotplug(struct drm_dp_mst_topology_mgr *mgr) drm_kms_helper_hotplug_event(dev); } -static void dm_dp_mst_link_status_reset(struct drm_connector *connector) -{ - mutex_lock(&connector->dev->mode_config.mutex); - drm_connector_set_link_status_property(connector, DRM_MODE_LINK_STATUS_BAD); - mutex_unlock(&connector->dev->mode_config.mutex); -} - static void dm_dp_mst_register_connector(struct drm_connector *connector) { struct drm_device *dev = connector->dev; struct amdgpu_device *adev = dev->dev_private; - struct amdgpu_dm_connector *aconnector = to_amdgpu_dm_connector(connector); if (adev->mode_info.rfbdev) drm_fb_helper_add_one_connector(&adev->mode_info.rfbdev->helper, connector); @@ -443,9 +420,6 @@ static void dm_dp_mst_register_connector(struct drm_connector *connector) DRM_ERROR("adev->mode_info.rfbdev is NULL\n"); drm_connector_register(connector); - - if (aconnector->mst_connected) - dm_dp_mst_link_status_reset(connector); } static const struct drm_dp_mst_topology_cbs dm_mst_cbs = { From 63237f8748bdf46dccf79ef8f98f05e9fe799162 Mon Sep 17 00:00:00 2001 From: Lyude Paul <lyude@redhat.com> Date: Thu, 1 Nov 2018 21:51:49 -0400 Subject: [PATCH 256/443] drm/amd/amdgpu/dm: Fix dm_dp_create_fake_mst_encoder() [why] Removing connector reusage from DM to match the rest of the tree ended up revealing an issue that was surprisingly subtle. The original amdgpu code for DC that was submitted appears to have left a chunk in dm_dp_create_fake_mst_encoder() that tries to find a "master encoder", the likes of which isn't actually used or stored anywhere. It does so at the wrong time as well by trying to access parts of the drm_connector from the encoder init before it's actually been initialized. This results in a NULL pointer deref on MST hotplugs: [ 160.696613] BUG: unable to handle kernel NULL pointer dereference at 0000000000000000 [ 160.697234] PGD 0 P4D 0 [ 160.697814] Oops: 0010 [#1] SMP PTI [ 160.698430] CPU: 2 PID: 64 Comm: kworker/2:1 Kdump: loaded Tainted: G O 4.19.0Lyude-Test+ #2 [ 160.699020] Hardware name: HP HP ZBook 15 G4/8275, BIOS P70 Ver. 01.22 05/17/2018 [ 160.699672] Workqueue: events_long drm_dp_mst_link_probe_work [drm_kms_helper] [ 160.700322] RIP: 0010: (null) [ 160.700920] Code: Bad RIP value. [ 160.701541] RSP: 0018:ffffc9000029fc78 EFLAGS: 00010206 [ 160.702183] RAX: 0000000000000000 RBX: ffff8804440ed468 RCX: ffff8804440e9158 [ 160.702778] RDX: 0000000000000000 RSI: ffff8804556c5700 RDI: ffff8804440ed000 [ 160.703408] RBP: ffff880458e21800 R08: 0000000000000002 R09: 000000005fca0a25 [ 160.704002] R10: ffff88045a077a3d R11: ffff88045a077a3c R12: ffff8804440ed000 [ 160.704614] R13: ffff880458e21800 R14: ffff8804440e9000 R15: ffff8804440e9000 [ 160.705260] FS: 0000000000000000(0000) GS:ffff88045f280000(0000) knlGS:0000000000000000 [ 160.705854] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ 160.706478] CR2: ffffffffffffffd6 CR3: 000000000200a001 CR4: 00000000003606e0 [ 160.707124] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 [ 160.707724] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 [ 160.708372] Call Trace: [ 160.708998] ? dm_dp_add_mst_connector+0xed/0x1d0 [amdgpu] [ 160.709625] ? drm_dp_add_port+0x2fa/0x470 [drm_kms_helper] [ 160.710284] ? wake_up_q+0x54/0x70 [ 160.710877] ? __mutex_unlock_slowpath.isra.18+0xb3/0x110 [ 160.711512] ? drm_dp_dpcd_access+0xe7/0x110 [drm_kms_helper] [ 160.712161] ? drm_dp_send_link_address+0x155/0x1e0 [drm_kms_helper] [ 160.712762] ? drm_dp_check_and_send_link_address+0xa3/0xd0 [drm_kms_helper] [ 160.713408] ? drm_dp_mst_link_probe_work+0x4b/0x80 [drm_kms_helper] [ 160.714013] ? process_one_work+0x1a1/0x3a0 [ 160.714667] ? worker_thread+0x30/0x380 [ 160.715326] ? wq_update_unbound_numa+0x10/0x10 [ 160.715939] ? kthread+0x112/0x130 [ 160.716591] ? kthread_create_worker_on_cpu+0x70/0x70 [ 160.717262] ? ret_from_fork+0x35/0x40 [ 160.717886] Modules linked in: amdgpu(O) vfat fat snd_hda_codec_generic joydev i915 chash gpu_sched ttm i2c_algo_bit drm_kms_helper snd_hda_codec_hdmi hp_wmi syscopyarea iTCO_wdt sysfillrect sparse_keymap sysimgblt fb_sys_fops snd_hda_intel usbhid wmi_bmof drm snd_hda_codec btusb snd_hda_core intel_rapl btrtl x86_pkg_temp_thermal btbcm btintel coretemp snd_pcm crc32_pclmul bluetooth psmouse snd_timer snd pcspkr i2c_i801 mei_me i2c_core soundcore mei tpm_tis wmi tpm_tis_core hp_accel ecdh_generic lis3lv02d tpm video rfkill acpi_pad input_polldev hp_wireless pcc_cpufreq crc32c_intel serio_raw tg3 xhci_pci xhci_hcd [last unloaded: amdgpu] [ 160.720141] CR2: 0000000000000000 Somehow the connector reusage DM was using for MST connectors managed to paper over this issue entirely; hence why this was never caught until now. [how] Since this code isn't used anywhere and seems useless anyway, we can just drop it entirely. This appears to fix the issue on my HP ZBook with an AMD WX4150. Signed-off-by: Lyude Paul <lyude@redhat.com> Reviewed-by: Harry Wentland <harry.wentland@amd.com> Signed-off-by: Alex Deucher <alexander.deucher@amd.com> --- drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.c | 5 ----- 1 file changed, 5 deletions(-) diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.c index 31126eb23a740..d02c32a1039c0 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.c @@ -285,12 +285,7 @@ dm_dp_create_fake_mst_encoder(struct amdgpu_dm_connector *connector) struct amdgpu_device *adev = dev->dev_private; struct amdgpu_encoder *amdgpu_encoder; struct drm_encoder *encoder; - const struct drm_connector_helper_funcs *connector_funcs = - connector->base.helper_private; - struct drm_encoder *enc_master = - connector_funcs->best_encoder(&connector->base); - DRM_DEBUG_KMS("enc master is %p\n", enc_master); amdgpu_encoder = kzalloc(sizeof(*amdgpu_encoder), GFP_KERNEL); if (!amdgpu_encoder) return NULL; From de59fae0043f07de5d25e02ca360f7d57bfa5866 Mon Sep 17 00:00:00 2001 From: Vasily Averin <vvs@virtuozzo.com> Date: Wed, 7 Nov 2018 22:36:23 -0500 Subject: [PATCH 257/443] ext4: fix buffer leak in __ext4_read_dirblock() on error path Fixes: dc6982ff4db1 ("ext4: refactor code to read directory blocks ...") Signed-off-by: Vasily Averin <vvs@virtuozzo.com> Signed-off-by: Theodore Ts'o <tytso@mit.edu> Cc: stable@kernel.org # 3.9 --- fs/ext4/namei.c | 1 + 1 file changed, 1 insertion(+) diff --git a/fs/ext4/namei.c b/fs/ext4/namei.c index d388cce72db20..6a6b90363ef1b 100644 --- a/fs/ext4/namei.c +++ b/fs/ext4/namei.c @@ -126,6 +126,7 @@ static struct buffer_head *__ext4_read_dirblock(struct inode *inode, if (!is_dx_block && type == INDEX) { ext4_error_inode(inode, func, line, block, "directory leaf block found instead of index block"); + brelse(bh); return ERR_PTR(-EFSCORRUPTED); } if (!ext4_has_metadata_csum(inode->i_sb) || From d2f007dbe7e4c9583eea6eb04d60001e85c6f1bd Mon Sep 17 00:00:00 2001 From: Jann Horn <jannh@google.com> Date: Mon, 5 Nov 2018 20:55:09 +0100 Subject: [PATCH 258/443] userns: also map extents in the reverse map to kernel IDs The current logic first clones the extent array and sorts both copies, then maps the lower IDs of the forward mapping into the lower namespace, but doesn't map the lower IDs of the reverse mapping. This means that code in a nested user namespace with >5 extents will see incorrect IDs. It also breaks some access checks, like inode_owner_or_capable() and privileged_wrt_inode_uidgid(), so a process can incorrectly appear to be capable relative to an inode. To fix it, we have to make sure that the "lower_first" members of extents in both arrays are translated; and we have to make sure that the reverse map is sorted *after* the translation (since otherwise the translation can break the sorting). This is CVE-2018-18955. Fixes: 6397fac4915a ("userns: bump idmap limits to 340") Cc: stable@vger.kernel.org Signed-off-by: Jann Horn <jannh@google.com> Tested-by: Eric W. Biederman <ebiederm@xmission.com> Reviewed-by: Eric W. Biederman <ebiederm@xmission.com> Signed-off-by: Eric W. Biederman <ebiederm@xmission.com> --- kernel/user_namespace.c | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/kernel/user_namespace.c b/kernel/user_namespace.c index e5222b5fb4fe6..923414a246e9e 100644 --- a/kernel/user_namespace.c +++ b/kernel/user_namespace.c @@ -974,10 +974,6 @@ static ssize_t map_write(struct file *file, const char __user *buf, if (!new_idmap_permitted(file, ns, cap_setid, &new_map)) goto out; - ret = sort_idmaps(&new_map); - if (ret < 0) - goto out; - ret = -EPERM; /* Map the lower ids from the parent user namespace to the * kernel global id space. @@ -1004,6 +1000,14 @@ static ssize_t map_write(struct file *file, const char __user *buf, e->lower_first = lower_first; } + /* + * If we want to use binary search for lookup, this clones the extent + * array and sorts both copies. + */ + ret = sort_idmaps(&new_map); + if (ret < 0) + goto out; + /* Install the map */ if (new_map.nr_extents <= UID_GID_MAP_MAX_BASE_EXTENTS) { memcpy(map->extent, new_map.extent, From 96ed82cc1f515a2329f93506129ab1de35429b89 Mon Sep 17 00:00:00 2001 From: "Maciej W. Rozycki" <macro@linux-mips.org> Date: Wed, 7 Nov 2018 12:06:52 +0000 Subject: [PATCH 259/443] FDDI: defza: Fix SPDX annotation The SPDX annotation for this driver does not match the license text, which specifies GNU GPL 2 or later. Make the two match by correcting the SPDX tag. Signed-off-by: Maciej W. Rozycki <macro@linux-mips.org> Signed-off-by: David S. Miller <davem@davemloft.net> --- drivers/net/fddi/defza.c | 2 +- drivers/net/fddi/defza.h | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/fddi/defza.c b/drivers/net/fddi/defza.c index 3b7f10a5f06a6..79980eeb0e438 100644 --- a/drivers/net/fddi/defza.c +++ b/drivers/net/fddi/defza.c @@ -1,4 +1,4 @@ -// SPDX-License-Identifier: GPL-2.0 +// SPDX-License-Identifier: GPL-2.0+ /* FDDI network adapter driver for DEC FDDIcontroller 700/700-C devices. * * Copyright (c) 2018 Maciej W. Rozycki diff --git a/drivers/net/fddi/defza.h b/drivers/net/fddi/defza.h index b06acf32738ea..ed659ed172151 100644 --- a/drivers/net/fddi/defza.h +++ b/drivers/net/fddi/defza.h @@ -1,4 +1,4 @@ -/* SPDX-License-Identifier: GPL-2.0 */ +/* SPDX-License-Identifier: GPL-2.0+ */ /* FDDI network adapter driver for DEC FDDIcontroller 700/700-C devices. * * Copyright (c) 2018 Maciej W. Rozycki From 5f5fae37dbcf37feae48c40d2580dbd67c5df131 Mon Sep 17 00:00:00 2001 From: "Maciej W. Rozycki" <macro@linux-mips.org> Date: Wed, 7 Nov 2018 12:06:59 +0000 Subject: [PATCH 260/443] FDDI: defza: Add missing comment closing Fix: drivers/net/fddi/defza.h:238:1: warning: "/*" within comment [-Wcomment] by adding a missing comment closing. Signed-off-by: Maciej W. Rozycki <macro@linux-mips.org> Signed-off-by: David S. Miller <davem@davemloft.net> --- drivers/net/fddi/defza.h | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/fddi/defza.h b/drivers/net/fddi/defza.h index ed659ed172151..93bda61be8e38 100644 --- a/drivers/net/fddi/defza.h +++ b/drivers/net/fddi/defza.h @@ -235,6 +235,7 @@ struct fza_ring_cmd { #define FZA_RING_CMD 0x200400 /* command ring address */ #define FZA_RING_CMD_SIZE 0x40 /* command descriptor ring * size + */ /* Command constants. */ #define FZA_RING_CMD_MASK 0x7fffffff #define FZA_RING_CMD_NOP 0x00000000 /* nop */ From 04453b6b241913f85af013eec187bab776428af5 Mon Sep 17 00:00:00 2001 From: "Maciej W. Rozycki" <macro@linux-mips.org> Date: Wed, 7 Nov 2018 12:07:05 +0000 Subject: [PATCH 261/443] FDDI: defza: Move SMT Tx data buffer declaration next to its skb Move the temporary data buffer used when tapping into the SMT Tx queue from the outer function level into the conditional block it's actually used in and its containing skb is also declared, making the structure of code better. Signed-off-by: Maciej W. Rozycki <macro@linux-mips.org> Signed-off-by: David S. Miller <davem@davemloft.net> --- drivers/net/fddi/defza.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/net/fddi/defza.c b/drivers/net/fddi/defza.c index 79980eeb0e438..bebd0d6d2320a 100644 --- a/drivers/net/fddi/defza.c +++ b/drivers/net/fddi/defza.c @@ -784,7 +784,7 @@ static void fza_rx(struct net_device *dev) static void fza_tx_smt(struct net_device *dev) { struct fza_private *fp = netdev_priv(dev); - struct fza_buffer_tx __iomem *smt_tx_ptr, *skb_data_ptr; + struct fza_buffer_tx __iomem *smt_tx_ptr; int i, len; u32 own; @@ -799,6 +799,7 @@ static void fza_tx_smt(struct net_device *dev) if (!netif_queue_stopped(dev)) { if (dev_nit_active(dev)) { + struct fza_buffer_tx *skb_data_ptr; struct sk_buff *skb; /* Length must be a multiple of 4 as only word From 8f5365ebf7b17c35dddbb694b4f0ffd1293a947f Mon Sep 17 00:00:00 2001 From: "Maciej W. Rozycki" <macro@linux-mips.org> Date: Wed, 7 Nov 2018 12:07:10 +0000 Subject: [PATCH 262/443] FDDI: defza: Make the driver version string constant The driver version string is obviously not meant to be changed at run time, so mark it `const'. Signed-off-by: Maciej W. Rozycki <macro@linux-mips.org> Signed-off-by: David S. Miller <davem@davemloft.net> --- drivers/net/fddi/defza.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/fddi/defza.c b/drivers/net/fddi/defza.c index bebd0d6d2320a..c5cae8e74dc40 100644 --- a/drivers/net/fddi/defza.c +++ b/drivers/net/fddi/defza.c @@ -56,7 +56,7 @@ #define DRV_VERSION "v.1.1.4" #define DRV_RELDATE "Oct 6 2018" -static char version[] = +static const char version[] = DRV_NAME ": " DRV_VERSION " " DRV_RELDATE " Maciej W. Rozycki\n"; MODULE_AUTHOR("Maciej W. Rozycki <macro@linux-mips.org>"); From 25d202ed820ee347edec0bf3bf553544556bf64b Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" <ebiederm@xmission.com> Date: Mon, 22 Oct 2018 10:21:38 -0500 Subject: [PATCH 263/443] mount: Retest MNT_LOCKED in do_umount It was recently pointed out that the one instance of testing MNT_LOCKED outside of the namespace_sem is in ksys_umount. Fix that by adding a test inside of do_umount with namespace_sem and the mount_lock held. As it helps to fail fails the existing test is maintained with an additional comment pointing out that it may be racy because the locks are not held. Cc: stable@vger.kernel.org Reported-by: Al Viro <viro@ZenIV.linux.org.uk> Fixes: 5ff9d8a65ce8 ("vfs: Lock in place mounts from more privileged users") Signed-off-by: "Eric W. Biederman" <ebiederm@xmission.com> --- fs/namespace.c | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/fs/namespace.c b/fs/namespace.c index 98d27da433047..72f10c40fe3f6 100644 --- a/fs/namespace.c +++ b/fs/namespace.c @@ -1540,8 +1540,13 @@ static int do_umount(struct mount *mnt, int flags) namespace_lock(); lock_mount_hash(); - event++; + /* Recheck MNT_LOCKED with the locks held */ + retval = -EINVAL; + if (mnt->mnt.mnt_flags & MNT_LOCKED) + goto out; + + event++; if (flags & MNT_DETACH) { if (!list_empty(&mnt->mnt_list)) umount_tree(mnt, UMOUNT_PROPAGATE); @@ -1555,6 +1560,7 @@ static int do_umount(struct mount *mnt, int flags) retval = 0; } } +out: unlock_mount_hash(); namespace_unlock(); return retval; @@ -1645,7 +1651,7 @@ int ksys_umount(char __user *name, int flags) goto dput_and_out; if (!check_mnt(mnt)) goto dput_and_out; - if (mnt->mnt.mnt_flags & MNT_LOCKED) + if (mnt->mnt.mnt_flags & MNT_LOCKED) /* Check optimistically */ goto dput_and_out; retval = -EPERM; if (flags & MNT_FORCE && !capable(CAP_SYS_ADMIN)) From df7342b240185d58d3d9665c0bbf0a0f5570ec29 Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" <ebiederm@xmission.com> Date: Thu, 25 Oct 2018 09:04:18 -0500 Subject: [PATCH 264/443] mount: Don't allow copying MNT_UNBINDABLE|MNT_LOCKED mounts Jonathan Calmels from NVIDIA reported that he's able to bypass the mount visibility security check in place in the Linux kernel by using a combination of the unbindable property along with the private mount propagation option to allow a unprivileged user to see a path which was purposefully hidden by the root user. Reproducer: # Hide a path to all users using a tmpfs root@castiana:~# mount -t tmpfs tmpfs /sys/devices/ root@castiana:~# # As an unprivileged user, unshare user namespace and mount namespace stgraber@castiana:~$ unshare -U -m -r # Confirm the path is still not accessible root@castiana:~# ls /sys/devices/ # Make /sys recursively unbindable and private root@castiana:~# mount --make-runbindable /sys root@castiana:~# mount --make-private /sys # Recursively bind-mount the rest of /sys over to /mnnt root@castiana:~# mount --rbind /sys/ /mnt # Access our hidden /sys/device as an unprivileged user root@castiana:~# ls /mnt/devices/ breakpoint cpu cstate_core cstate_pkg i915 intel_pt isa kprobe LNXSYSTM:00 msr pci0000:00 platform pnp0 power software system tracepoint uncore_arb uncore_cbox_0 uncore_cbox_1 uprobe virtual Solve this by teaching copy_tree to fail if a mount turns out to be both unbindable and locked. Cc: stable@vger.kernel.org Fixes: 5ff9d8a65ce8 ("vfs: Lock in place mounts from more privileged users") Reported-by: Jonathan Calmels <jcalmels@nvidia.com> Signed-off-by: "Eric W. Biederman" <ebiederm@xmission.com> --- fs/namespace.c | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/fs/namespace.c b/fs/namespace.c index 72f10c40fe3f6..e0e0f9cf6c30d 100644 --- a/fs/namespace.c +++ b/fs/namespace.c @@ -1734,8 +1734,14 @@ struct mount *copy_tree(struct mount *mnt, struct dentry *dentry, for (s = r; s; s = next_mnt(s, r)) { if (!(flag & CL_COPY_UNBINDABLE) && IS_MNT_UNBINDABLE(s)) { - s = skip_mnt_tree(s); - continue; + if (s->mnt.mnt_flags & MNT_LOCKED) { + /* Both unbindable and locked. */ + q = ERR_PTR(-EPERM); + goto out; + } else { + s = skip_mnt_tree(s); + continue; + } } if (!(flag & CL_COPY_MNT_NS_FILE) && is_mnt_ns_file(s->mnt.mnt_root)) { From e84b47941e15e6666afb8ee8b21d1c3fc1a013af Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Micha=C5=82=20Miros=C5=82aw?= <mirq-linux@rere.qmqm.pl> Date: Wed, 7 Nov 2018 17:50:52 +0100 Subject: [PATCH 265/443] ibmvnic: fix accelerated VLAN handling MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Don't request tag insertion when it isn't present in outgoing skb. Signed-off-by: Michał Mirosław <mirq-linux@rere.qmqm.pl> Signed-off-by: David S. Miller <davem@davemloft.net> --- drivers/net/ethernet/ibm/ibmvnic.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/ibm/ibmvnic.c b/drivers/net/ethernet/ibm/ibmvnic.c index 7893beffcc714..c9d5d0a7fbf17 100644 --- a/drivers/net/ethernet/ibm/ibmvnic.c +++ b/drivers/net/ethernet/ibm/ibmvnic.c @@ -1545,7 +1545,7 @@ static netdev_tx_t ibmvnic_xmit(struct sk_buff *skb, struct net_device *netdev) tx_crq.v1.sge_len = cpu_to_be32(skb->len); tx_crq.v1.ioba = cpu_to_be64(data_dma_addr); - if (adapter->vlan_header_insertion) { + if (adapter->vlan_header_insertion && skb_vlan_tag_present(skb)) { tx_crq.v1.flags2 |= IBMVNIC_TX_VLAN_INSERT; tx_crq.v1.vlan_id = cpu_to_be16(skb->vlan_tci); } From b25ddb00bc1b96613edcb525f19203a7d1405fce Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Micha=C5=82=20Miros=C5=82aw?= <mirq-linux@rere.qmqm.pl> Date: Wed, 7 Nov 2018 17:50:53 +0100 Subject: [PATCH 266/443] qlcnic: remove assumption that vlan_tci != 0 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit VLAN.TCI == 0 is perfectly valid (802.1p), so allow it to be accelerated. Signed-off-by: Michał Mirosław <mirq-linux@rere.qmqm.pl> Signed-off-by: David S. Miller <davem@davemloft.net> --- drivers/net/ethernet/qlogic/qlcnic/qlcnic_io.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_io.c b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_io.c index 9647578cbe6a8..14f26bf3b388b 100644 --- a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_io.c +++ b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_io.c @@ -459,7 +459,7 @@ static int qlcnic_tx_pkt(struct qlcnic_adapter *adapter, struct cmd_desc_type0 *first_desc, struct sk_buff *skb, struct qlcnic_host_tx_ring *tx_ring) { - u8 l4proto, opcode = 0, hdr_len = 0; + u8 l4proto, opcode = 0, hdr_len = 0, tag_vlan = 0; u16 flags = 0, vlan_tci = 0; int copied, offset, copy_len, size; struct cmd_desc_type0 *hwdesc; @@ -472,14 +472,16 @@ static int qlcnic_tx_pkt(struct qlcnic_adapter *adapter, flags = QLCNIC_FLAGS_VLAN_TAGGED; vlan_tci = ntohs(vh->h_vlan_TCI); protocol = ntohs(vh->h_vlan_encapsulated_proto); + tag_vlan = 1; } else if (skb_vlan_tag_present(skb)) { flags = QLCNIC_FLAGS_VLAN_OOB; vlan_tci = skb_vlan_tag_get(skb); + tag_vlan = 1; } if (unlikely(adapter->tx_pvid)) { - if (vlan_tci && !(adapter->flags & QLCNIC_TAGGING_ENABLED)) + if (tag_vlan && !(adapter->flags & QLCNIC_TAGGING_ENABLED)) return -EIO; - if (vlan_tci && (adapter->flags & QLCNIC_TAGGING_ENABLED)) + if (tag_vlan && (adapter->flags & QLCNIC_TAGGING_ENABLED)) goto set_flags; flags = QLCNIC_FLAGS_VLAN_OOB; From e6a2d72c10405b30ddba5af2e44a9d3d925a56d3 Mon Sep 17 00:00:00 2001 From: Juri Lelli <juri.lelli@redhat.com> Date: Wed, 7 Nov 2018 12:10:32 +0100 Subject: [PATCH 267/443] posix-cpu-timers: Remove useless call to check_dl_overrun() check_dl_overrun() is used to send a SIGXCPU to users that asked to be informed when a SCHED_DEADLINE runtime overruns occur. The function is called by check_thread_timers() already, so the call in check_process_timers() is redundant/wrong (even though harmless). Remove it. Fixes: 34be39305a77 ("sched/deadline: Implement "runtime overrun signal" support") Signed-off-by: Juri Lelli <juri.lelli@redhat.com> Signed-off-by: Thomas Gleixner <tglx@linutronix.de> Reviewed-by: Daniel Bristot de Oliveira <bristot@redhat.com> Reviewed-by: Steven Rostedt (VMware) <rostedt@goodmis.org> Cc: linux-rt-users@vger.kernel.org Cc: mtk.manpages@gmail.com Cc: Mathieu Poirier <mathieu.poirier@linaro.org> Cc: Peter Zijlstra <peterz@infradead.org> Cc: Luca Abeni <luca.abeni@santannapisa.it> Cc: Claudio Scordino <claudio@evidence.eu.com> Link: https://lkml.kernel.org/r/20181107111032.32291-1-juri.lelli@redhat.com --- kernel/time/posix-cpu-timers.c | 3 --- 1 file changed, 3 deletions(-) diff --git a/kernel/time/posix-cpu-timers.c b/kernel/time/posix-cpu-timers.c index ce32cf741b250..8f0644af40be7 100644 --- a/kernel/time/posix-cpu-timers.c +++ b/kernel/time/posix-cpu-timers.c @@ -917,9 +917,6 @@ static void check_process_timers(struct task_struct *tsk, struct task_cputime cputime; unsigned long soft; - if (dl_task(tsk)) - check_dl_overrun(tsk); - /* * If cputimer is not running, then there are no active * process wide timers (POSIX 1.b, itimers, RLIMIT_CPU). From 0bb2ae1b26e1fb7543ec7474cdd374ac4b88c4da Mon Sep 17 00:00:00 2001 From: Thomas Richter <tmricht@linux.ibm.com> Date: Mon, 29 Oct 2018 08:11:33 +0000 Subject: [PATCH 268/443] s390/perf: Change CPUM_CF return code in event init function The function perf_init_event() creates a new event and assignes it to a PMU. This a done in a loop over all existing PMUs. For each listed PMU the event init function is called and if this function does return any other error than -ENOENT, the loop is terminated the creation of the event fails. If the event is invalid, return -ENOENT to try other PMUs. Signed-off-by: Thomas Richter <tmricht@linux.ibm.com> Reviewed-by: Hendrik Brueckner <brueckner@linux.ibm.com> Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com> --- arch/s390/kernel/perf_cpum_cf.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/s390/kernel/perf_cpum_cf.c b/arch/s390/kernel/perf_cpum_cf.c index cc085e2d2ce99..74091fd3101e9 100644 --- a/arch/s390/kernel/perf_cpum_cf.c +++ b/arch/s390/kernel/perf_cpum_cf.c @@ -373,7 +373,7 @@ static int __hw_perf_event_init(struct perf_event *event) return -ENOENT; if (ev > PERF_CPUM_CF_MAX_CTR) - return -EINVAL; + return -ENOENT; /* Obtain the counter set to which the specified counter belongs */ set = get_counter_set(ev); From 9c8e0a1b683525464a2abe9fb4b54404a50ed2b4 Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" <ebiederm@xmission.com> Date: Thu, 25 Oct 2018 12:05:11 -0500 Subject: [PATCH 269/443] mount: Prevent MNT_DETACH from disconnecting locked mounts Timothy Baldwin <timbaldwin@fastmail.co.uk> wrote: > As per mount_namespaces(7) unprivileged users should not be able to look under mount points: > > Mounts that come as a single unit from more privileged mount are locked > together and may not be separated in a less privileged mount namespace. > > However they can: > > 1. Create a mount namespace. > 2. In the mount namespace open a file descriptor to the parent of a mount point. > 3. Destroy the mount namespace. > 4. Use the file descriptor to look under the mount point. > > I have reproduced this with Linux 4.16.18 and Linux 4.18-rc8. > > The setup: > > $ sudo sysctl kernel.unprivileged_userns_clone=1 > kernel.unprivileged_userns_clone = 1 > $ mkdir -p A/B/Secret > $ sudo mount -t tmpfs hide A/B > > > "Secret" is indeed hidden as expected: > > $ ls -lR A > A: > total 0 > drwxrwxrwt 2 root root 40 Feb 12 21:08 B > > A/B: > total 0 > > > The attack revealing "Secret": > > $ unshare -Umr sh -c "exec unshare -m ls -lR /proc/self/fd/4/ 4<A" > /proc/self/fd/4/: > total 0 > drwxr-xr-x 3 root root 60 Feb 12 21:08 B > > /proc/self/fd/4/B: > total 0 > drwxr-xr-x 2 root root 40 Feb 12 21:08 Secret > > /proc/self/fd/4/B/Secret: > total 0 I tracked this down to put_mnt_ns running passing UMOUNT_SYNC and disconnecting all of the mounts in a mount namespace. Fix this by factoring drop_mounts out of drop_collected_mounts and passing 0 instead of UMOUNT_SYNC. There are two possible behavior differences that result from this. - No longer setting UMOUNT_SYNC will no longer set MNT_SYNC_UMOUNT on the vfsmounts being unmounted. This effects the lazy rcu walk by kicking the walk out of rcu mode and forcing it to be a non-lazy walk. - No longer disconnecting locked mounts will keep some mounts around longer as they stay because the are locked to other mounts. There are only two users of drop_collected mounts: audit_tree.c and put_mnt_ns. In audit_tree.c the mounts are private and there are no rcu lazy walks only calls to iterate_mounts. So the changes should have no effect except for a small timing effect as the connected mounts are disconnected. In put_mnt_ns there may be references from process outside the mount namespace to the mounts. So the mounts remaining connected will be the bug fix that is needed. That rcu walks are allowed to continue appears not to be a problem especially as the rcu walk change was about an implementation detail not about semantics. Cc: stable@vger.kernel.org Fixes: 5ff9d8a65ce8 ("vfs: Lock in place mounts from more privileged users") Reported-by: Timothy Baldwin <timbaldwin@fastmail.co.uk> Tested-by: Timothy Baldwin <timbaldwin@fastmail.co.uk> Signed-off-by: "Eric W. Biederman" <ebiederm@xmission.com> --- fs/namespace.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/namespace.c b/fs/namespace.c index e0e0f9cf6c30d..74f64294a4108 100644 --- a/fs/namespace.c +++ b/fs/namespace.c @@ -1794,7 +1794,7 @@ void drop_collected_mounts(struct vfsmount *mnt) { namespace_lock(); lock_mount_hash(); - umount_tree(real_mount(mnt), UMOUNT_SYNC); + umount_tree(real_mount(mnt), 0); unlock_mount_hash(); namespace_unlock(); } From 24efee412c75843755da0ddd7bbc2db2ce9129f5 Mon Sep 17 00:00:00 2001 From: Miguel Ojeda <miguel.ojeda.sandonis@gmail.com> Date: Tue, 6 Nov 2018 22:52:11 +0100 Subject: [PATCH 270/443] Compiler Attributes: improve explanation of header Explain better what "optional" attributes are, and avoid calling them so to avoid confusion. Simply retain "Optional" as a word to look for in the comments. Moreover, add a couple sentences to explain a bit more the intention and the documentation links. Signed-off-by: Miguel Ojeda <miguel.ojeda.sandonis@gmail.com> --- include/linux/compiler_attributes.h | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) diff --git a/include/linux/compiler_attributes.h b/include/linux/compiler_attributes.h index 6b28c1b7310c0..f8c400ba1929e 100644 --- a/include/linux/compiler_attributes.h +++ b/include/linux/compiler_attributes.h @@ -4,22 +4,26 @@ /* * The attributes in this file are unconditionally defined and they directly - * map to compiler attribute(s) -- except those that are optional. + * map to compiler attribute(s), unless one of the compilers does not support + * the attribute. In that case, __has_attribute is used to check for support + * and the reason is stated in its comment ("Optional: ..."). * * Any other "attributes" (i.e. those that depend on a configuration option, * on a compiler, on an architecture, on plugins, on other attributes...) * should be defined elsewhere (e.g. compiler_types.h or compiler-*.h). + * The intention is to keep this file as simple as possible, as well as + * compiler- and version-agnostic (e.g. avoiding GCC_VERSION checks). * * This file is meant to be sorted (by actual attribute name, * not by #define identifier). Use the __attribute__((__name__)) syntax * (i.e. with underscores) to avoid future collisions with other macros. - * If an attribute is optional, state the reason in the comment. + * Provide links to the documentation of each supported compiler, if it exists. */ /* - * To check for optional attributes, we use __has_attribute, which is supported - * on gcc >= 5, clang >= 2.9 and icc >= 17. In the meantime, to support - * 4.6 <= gcc < 5, we implement __has_attribute by hand. + * __has_attribute is supported on gcc >= 5, clang >= 2.9 and icc >= 17. + * In the meantime, to support 4.6 <= gcc < 5, we implement __has_attribute + * by hand. * * sparse does not support __has_attribute (yet) and defines __GNUC_MINOR__ * depending on the compiler used to build it; however, these attributes have From 943210ba807ec50aafa2fa7b13bd6d36a478969b Mon Sep 17 00:00:00 2001 From: Mikulas Patocka <mpatocka@redhat.com> Date: Tue, 23 Oct 2018 11:28:28 -0400 Subject: [PATCH 271/443] vt: fix broken display when running aptitude If you run aptitude on framebuffer console, the display is corrupted. The corruption is caused by the commit d8ae7242. The patch adds "offset" to "start" when calling scr_memsetw, but it forgets to do the same addition on a subsequent call to do_update_region. Signed-off-by: Mikulas Patocka <mpatocka@redhat.com> Fixes: d8ae72427187 ("vt: preserve unicode values corresponding to screen characters") Reviewed-by: Nicolas Pitre <nico@linaro.org> Cc: stable@vger.kernel.org # 4.19 Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> --- drivers/tty/vt/vt.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/tty/vt/vt.c b/drivers/tty/vt/vt.c index 5f1183b0b89df..476ec4b1b86cd 100644 --- a/drivers/tty/vt/vt.c +++ b/drivers/tty/vt/vt.c @@ -1551,7 +1551,7 @@ static void csi_K(struct vc_data *vc, int vpar) scr_memsetw(start + offset, vc->vc_video_erase_char, 2 * count); vc->vc_need_wrap = 0; if (con_should_update(vc)) - do_update_region(vc, (unsigned long) start, count); + do_update_region(vc, (unsigned long)(start + offset), count); } static void csi_X(struct vc_data *vc, int vpar) /* erase the following vpar positions */ From 991a25194097006ec1e0d2e0814ff920e59e3465 Mon Sep 17 00:00:00 2001 From: "H. Peter Anvin" <hpa@zytor.com> Date: Mon, 22 Oct 2018 09:19:04 -0700 Subject: [PATCH 272/443] termios, tty/tty_baudrate.c: fix buffer overrun On architectures with CBAUDEX == 0 (Alpha and PowerPC), the code in tty_baudrate.c does not do any limit checking on the tty_baudrate[] array, and in fact a buffer overrun is possible on both architectures. Add a limit check to prevent that situation. This will be followed by a much bigger cleanup/simplification patch. Signed-off-by: H. Peter Anvin (Intel) <hpa@zytor.com> Requested-by: Cc: Johan Hovold <johan@kernel.org> Cc: Jiri Slaby <jslaby@suse.com> Cc: Al Viro <viro@zeniv.linux.org.uk> Cc: Richard Henderson <rth@twiddle.net> Cc: Ivan Kokshaysky <ink@jurassic.park.msu.ru> Cc: Matt Turner <mattst88@gmail.com> Cc: Thomas Gleixner <tglx@linutronix.de> Cc: Kate Stewart <kstewart@linuxfoundation.org> Cc: Philippe Ombredanne <pombredanne@nexb.com> Cc: Eugene Syromiatnikov <esyr@redhat.com> Cc: Alan Cox <alan@lxorguk.ukuu.org.uk> Cc: stable <stable@vger.kernel.org> Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> --- drivers/tty/tty_baudrate.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/tty/tty_baudrate.c b/drivers/tty/tty_baudrate.c index 7576ceace5715..f438eaa682463 100644 --- a/drivers/tty/tty_baudrate.c +++ b/drivers/tty/tty_baudrate.c @@ -77,7 +77,7 @@ speed_t tty_termios_baud_rate(struct ktermios *termios) else cbaud += 15; } - return baud_table[cbaud]; + return cbaud >= n_baud_table ? 0 : baud_table[cbaud]; } EXPORT_SYMBOL(tty_termios_baud_rate); @@ -113,7 +113,7 @@ speed_t tty_termios_input_baud_rate(struct ktermios *termios) else cbaud += 15; } - return baud_table[cbaud]; + return cbaud >= n_baud_table ? 0 : baud_table[cbaud]; #else /* IBSHIFT */ return tty_termios_baud_rate(termios); #endif /* IBSHIFT */ From d0ffb805b729322626639336986bc83fc2e60871 Mon Sep 17 00:00:00 2001 From: "H. Peter Anvin (Intel)" <hpa@zytor.com> Date: Mon, 22 Oct 2018 09:19:05 -0700 Subject: [PATCH 273/443] arch/alpha, termios: implement BOTHER, IBSHIFT and termios2 Alpha has had c_ispeed and c_ospeed, but still set speeds in c_cflags using arbitrary flags. Because BOTHER is not defined, the general Linux code doesn't allow setting arbitrary baud rates, and because CBAUDEX == 0, we can have an array overrun of the baud_rate[] table in drivers/tty/tty_baudrate.c if (c_cflags & CBAUD) == 037. Resolve both problems by #defining BOTHER to 037 on Alpha. However, userspace still needs to know if setting BOTHER is actually safe given legacy kernels (does anyone actually care about that on Alpha anymore?), so enable the TCGETS2/TCSETS*2 ioctls on Alpha, even though they use the same structure. Define struct termios2 just for compatibility; it is the exact same structure as struct termios. In a future patchset, this will be cleaned up so the uapi headers are usable from libc. Signed-off-by: H. Peter Anvin (Intel) <hpa@zytor.com> Cc: Jiri Slaby <jslaby@suse.com> Cc: Al Viro <viro@zeniv.linux.org.uk> Cc: Richard Henderson <rth@twiddle.net> Cc: Ivan Kokshaysky <ink@jurassic.park.msu.ru> Cc: Matt Turner <mattst88@gmail.com> Cc: Thomas Gleixner <tglx@linutronix.de> Cc: Kate Stewart <kstewart@linuxfoundation.org> Cc: Philippe Ombredanne <pombredanne@nexb.com> Cc: Eugene Syromiatnikov <esyr@redhat.com> Cc: <linux-alpha@vger.kernel.org> Cc: <linux-serial@vger.kernel.org> Cc: Johan Hovold <johan@kernel.org> Cc: Alan Cox <alan@lxorguk.ukuu.org.uk> Cc: <stable@vger.kernel.org> Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> --- arch/alpha/include/asm/termios.h | 8 +++++++- arch/alpha/include/uapi/asm/ioctls.h | 5 +++++ arch/alpha/include/uapi/asm/termbits.h | 17 +++++++++++++++++ 3 files changed, 29 insertions(+), 1 deletion(-) diff --git a/arch/alpha/include/asm/termios.h b/arch/alpha/include/asm/termios.h index 6a8c53dec57e6..b7c77bb1bfd20 100644 --- a/arch/alpha/include/asm/termios.h +++ b/arch/alpha/include/asm/termios.h @@ -73,9 +73,15 @@ }) #define user_termios_to_kernel_termios(k, u) \ - copy_from_user(k, u, sizeof(struct termios)) + copy_from_user(k, u, sizeof(struct termios2)) #define kernel_termios_to_user_termios(u, k) \ + copy_to_user(u, k, sizeof(struct termios2)) + +#define user_termios_to_kernel_termios_1(k, u) \ + copy_from_user(k, u, sizeof(struct termios)) + +#define kernel_termios_to_user_termios_1(u, k) \ copy_to_user(u, k, sizeof(struct termios)) #endif /* _ALPHA_TERMIOS_H */ diff --git a/arch/alpha/include/uapi/asm/ioctls.h b/arch/alpha/include/uapi/asm/ioctls.h index 3729d92d3fa85..dc8c20ac7191f 100644 --- a/arch/alpha/include/uapi/asm/ioctls.h +++ b/arch/alpha/include/uapi/asm/ioctls.h @@ -32,6 +32,11 @@ #define TCXONC _IO('t', 30) #define TCFLSH _IO('t', 31) +#define TCGETS2 _IOR('T', 42, struct termios2) +#define TCSETS2 _IOW('T', 43, struct termios2) +#define TCSETSW2 _IOW('T', 44, struct termios2) +#define TCSETSF2 _IOW('T', 45, struct termios2) + #define TIOCSWINSZ _IOW('t', 103, struct winsize) #define TIOCGWINSZ _IOR('t', 104, struct winsize) #define TIOCSTART _IO('t', 110) /* start output, like ^Q */ diff --git a/arch/alpha/include/uapi/asm/termbits.h b/arch/alpha/include/uapi/asm/termbits.h index de6c8360fbe36..4575ba34a0eae 100644 --- a/arch/alpha/include/uapi/asm/termbits.h +++ b/arch/alpha/include/uapi/asm/termbits.h @@ -26,6 +26,19 @@ struct termios { speed_t c_ospeed; /* output speed */ }; +/* Alpha has identical termios and termios2 */ + +struct termios2 { + tcflag_t c_iflag; /* input mode flags */ + tcflag_t c_oflag; /* output mode flags */ + tcflag_t c_cflag; /* control mode flags */ + tcflag_t c_lflag; /* local mode flags */ + cc_t c_cc[NCCS]; /* control characters */ + cc_t c_line; /* line discipline (== c_cc[19]) */ + speed_t c_ispeed; /* input speed */ + speed_t c_ospeed; /* output speed */ +}; + /* Alpha has matching termios and ktermios */ struct ktermios { @@ -152,6 +165,7 @@ struct ktermios { #define B3000000 00034 #define B3500000 00035 #define B4000000 00036 +#define BOTHER 00037 #define CSIZE 00001400 #define CS5 00000000 @@ -169,6 +183,9 @@ struct ktermios { #define CMSPAR 010000000000 /* mark or space (stick) parity */ #define CRTSCTS 020000000000 /* flow control */ +#define CIBAUD 07600000 +#define IBSHIFT 16 + /* c_lflag bits */ #define ISIG 0x00000080 #define ICANON 0x00000100 From 0033dfd92a5646a78025e86f8df4d5b18181ba2c Mon Sep 17 00:00:00 2001 From: Anton Ivanov <anton.ivanov@cambridgegreys.com> Date: Thu, 8 Nov 2018 13:07:23 +0000 Subject: [PATCH 274/443] ubd: fix missing initialization of io_req The SYNC path doesn't initialize io_req->error, which can cause random errors. Before the conversion to blk-mq, we always completed requests with BLK_STS_OK status, but now we actually look at the error field and this issue becomes apparent. Signed-off-by: Anton Ivanov <anton.ivanov@cambridgegreys.com> [axboe: fixed up commit message to explain what is actually going on] Signed-off-by: Jens Axboe <axboe@kernel.dk> --- arch/um/drivers/ubd_kern.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/arch/um/drivers/ubd_kern.c b/arch/um/drivers/ubd_kern.c index 08831f5d83db0..28c40624bcb6f 100644 --- a/arch/um/drivers/ubd_kern.c +++ b/arch/um/drivers/ubd_kern.c @@ -1305,6 +1305,7 @@ static int ubd_queue_one_vec(struct blk_mq_hw_ctx *hctx, struct request *req, io_req->fds[0] = dev->cow.fd; else io_req->fds[0] = dev->fd; + io_req->error = 0; if (req_op(req) == REQ_OP_FLUSH) { io_req->op = UBD_FLUSH; @@ -1313,9 +1314,7 @@ static int ubd_queue_one_vec(struct blk_mq_hw_ctx *hctx, struct request *req, io_req->cow_offset = -1; io_req->offset = off; io_req->length = bvec->bv_len; - io_req->error = 0; io_req->sector_mask = 0; - io_req->op = rq_data_dir(req) == READ ? UBD_READ : UBD_WRITE; io_req->offsets[0] = 0; io_req->offsets[1] = dev->cow.data_offset; From bb39ba6a8deab70752b836a36c62205b1c65b559 Mon Sep 17 00:00:00 2001 From: Kuninori Morimoto <kuninori.morimoto.gx@renesas.com> Date: Thu, 8 Nov 2018 06:34:34 +0000 Subject: [PATCH 275/443] sata_rcar: convert to SPDX identifiers This patch updates license to use SPDX-License-Identifier instead of verbose license text. Signed-off-by: Kuninori Morimoto <kuninori.morimoto.gx@renesas.com> Signed-off-by: Jens Axboe <axboe@kernel.dk> --- drivers/ata/sata_rcar.c | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/drivers/ata/sata_rcar.c b/drivers/ata/sata_rcar.c index 10ecb232245db..4b1ff5bc256a3 100644 --- a/drivers/ata/sata_rcar.c +++ b/drivers/ata/sata_rcar.c @@ -1,14 +1,10 @@ +// SPDX-License-Identifier: GPL-2.0+ /* * Renesas R-Car SATA driver * * Author: Vladimir Barinov <source@cogentembedded.com> * Copyright (C) 2013-2015 Cogent Embedded, Inc. * Copyright (C) 2013-2015 Renesas Solutions Corp. - * - * This program is free software; you can redistribute it and/or modify it - * under the terms of the GNU General Public License as published by the - * Free Software Foundation; either version 2 of the License, or (at your - * option) any later version. */ #include <linux/kernel.h> From b469e7e47c8a075cc08bcd1e85d4365134bdcdd5 Mon Sep 17 00:00:00 2001 From: Amir Goldstein <amir73il@gmail.com> Date: Tue, 30 Oct 2018 20:29:53 +0200 Subject: [PATCH 276/443] fanotify: fix handling of events on child sub-directory When an event is reported on a sub-directory and the parent inode has a mark mask with FS_EVENT_ON_CHILD|FS_ISDIR, the event will be sent to fsnotify() even if the event type is not in the parent mark mask (e.g. FS_OPEN). Further more, if that event happened on a mount or a filesystem with a mount/sb mark that does have that event type in their mask, the "on child" event will be reported on the mount/sb mark. That is not desired, because user will get a duplicate event for the same action. Note that the event reported on the victim inode is never merged with the event reported on the parent inode, because of the check in should_merge(): old_fsn->inode == new_fsn->inode. Fix this by looking for a match of an actual event type (i.e. not just FS_ISDIR) in parent's inode mark mask and by not reporting an "on child" event to group if event type is only found on mount/sb marks. [backport hint: The bug seems to have always been in fanotify, but this patch will only apply cleanly to v4.19.y] Cc: <stable@vger.kernel.org> # v4.19 Signed-off-by: Amir Goldstein <amir73il@gmail.com> Signed-off-by: Jan Kara <jack@suse.cz> --- fs/notify/fanotify/fanotify.c | 10 +++++----- fs/notify/fsnotify.c | 7 +++++-- 2 files changed, 10 insertions(+), 7 deletions(-) diff --git a/fs/notify/fanotify/fanotify.c b/fs/notify/fanotify/fanotify.c index 5769cf3ff035a..e08a6647267b1 100644 --- a/fs/notify/fanotify/fanotify.c +++ b/fs/notify/fanotify/fanotify.c @@ -115,12 +115,12 @@ static bool fanotify_should_send_event(struct fsnotify_iter_info *iter_info, continue; mark = iter_info->marks[type]; /* - * if the event is for a child and this inode doesn't care about - * events on the child, don't send it! + * If the event is for a child and this mark doesn't care about + * events on a child, don't send it! */ - if (type == FSNOTIFY_OBJ_TYPE_INODE && - (event_mask & FS_EVENT_ON_CHILD) && - !(mark->mask & FS_EVENT_ON_CHILD)) + if (event_mask & FS_EVENT_ON_CHILD && + (type != FSNOTIFY_OBJ_TYPE_INODE || + !(mark->mask & FS_EVENT_ON_CHILD))) continue; marks_mask |= mark->mask; diff --git a/fs/notify/fsnotify.c b/fs/notify/fsnotify.c index 2172ba516c61d..d2c34900ae05d 100644 --- a/fs/notify/fsnotify.c +++ b/fs/notify/fsnotify.c @@ -167,9 +167,9 @@ int __fsnotify_parent(const struct path *path, struct dentry *dentry, __u32 mask parent = dget_parent(dentry); p_inode = parent->d_inode; - if (unlikely(!fsnotify_inode_watches_children(p_inode))) + if (unlikely(!fsnotify_inode_watches_children(p_inode))) { __fsnotify_update_child_dentry_flags(p_inode); - else if (p_inode->i_fsnotify_mask & mask) { + } else if (p_inode->i_fsnotify_mask & mask & ALL_FSNOTIFY_EVENTS) { struct name_snapshot name; /* we are notifying a parent so come up with the new mask which @@ -339,6 +339,9 @@ int fsnotify(struct inode *to_tell, __u32 mask, const void *data, int data_is, sb = mnt->mnt.mnt_sb; mnt_or_sb_mask = mnt->mnt_fsnotify_mask | sb->s_fsnotify_mask; } + /* An event "on child" is not intended for a mount/sb mark */ + if (mask & FS_EVENT_ON_CHILD) + mnt_or_sb_mask = 0; /* * Optimization: srcu_read_lock() has a memory barrier which can From c2c6d3ce0d9a1fae4472fc10755372d022a487a4 Mon Sep 17 00:00:00 2001 From: Luis Henriques <lhenriques@suse.com> Date: Tue, 23 Oct 2018 16:53:14 +0000 Subject: [PATCH 277/443] ceph: add destination file data sync before doing any remote copy MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit If we try to copy into a file that was just written, any data that is remote copied will be overwritten by our buffered writes once they are flushed. When this happens, the call to invalidate_inode_pages2_range will also return a -EBUSY error. This patch fixes this by also sync'ing the destination file before starting any copy. Fixes: 503f82a9932d ("ceph: support copy_file_range file operation") Signed-off-by: Luis Henriques <lhenriques@suse.com> Reviewed-by: "Yan, Zheng" <zyan@redhat.com> Signed-off-by: Ilya Dryomov <idryomov@gmail.com> --- fs/ceph/file.c | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/fs/ceph/file.c b/fs/ceph/file.c index 27cad84dab23a..189df668b6a0c 100644 --- a/fs/ceph/file.c +++ b/fs/ceph/file.c @@ -1931,10 +1931,17 @@ static ssize_t ceph_copy_file_range(struct file *src_file, loff_t src_off, if (!prealloc_cf) return -ENOMEM; - /* Start by sync'ing the source file */ + /* Start by sync'ing the source and destination files */ ret = file_write_and_wait_range(src_file, src_off, (src_off + len)); - if (ret < 0) + if (ret < 0) { + dout("failed to write src file (%zd)\n", ret); + goto out; + } + ret = file_write_and_wait_range(dst_file, dst_off, (dst_off + len)); + if (ret < 0) { + dout("failed to write dst file (%zd)\n", ret); goto out; + } /* * We need FILE_WR caps for dst_ci and FILE_RD for src_ci as other From 71f2cc64d027d712f29bf8d09d3e123302d5f245 Mon Sep 17 00:00:00 2001 From: Luis Henriques <lhenriques@suse.com> Date: Mon, 5 Nov 2018 19:00:52 +0000 Subject: [PATCH 278/443] ceph: quota: fix null pointer dereference in quota check MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This patch fixes a possible null pointer dereference in check_quota_exceeded, detected by the static checker smatch, with the following warning: fs/ceph/quota.c:240 check_quota_exceeded() error: we previously assumed 'realm' could be null (see line 188) Fixes: b7a2921765cf ("ceph: quota: support for ceph.quota.max_files") Reported-by: Dan Carpenter <dan.carpenter@oracle.com> Signed-off-by: Luis Henriques <lhenriques@suse.com> Reviewed-by: "Yan, Zheng" <zyan@redhat.com> Signed-off-by: Ilya Dryomov <idryomov@gmail.com> --- fs/ceph/quota.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/fs/ceph/quota.c b/fs/ceph/quota.c index 32d4f13784ba5..03f4d24db8fe0 100644 --- a/fs/ceph/quota.c +++ b/fs/ceph/quota.c @@ -237,7 +237,8 @@ static bool check_quota_exceeded(struct inode *inode, enum quota_check_op op, ceph_put_snap_realm(mdsc, realm); realm = next; } - ceph_put_snap_realm(mdsc, realm); + if (realm) + ceph_put_snap_realm(mdsc, realm); up_read(&mdsc->snap_rwsem); return exceeded; From 23c625ce3065e40c933a4239efb9b11f1194a343 Mon Sep 17 00:00:00 2001 From: Ilya Dryomov <idryomov@gmail.com> Date: Thu, 8 Nov 2018 14:55:21 +0100 Subject: [PATCH 279/443] libceph: assume argonaut on the server side No one is running pre-argonaut. In addition one of the argonaut features (NOSRCADDR) has been required since day one (and a half, 2.6.34 vs 2.6.35) of the kernel client. Allow for the possibility of reusing these feature bits later. Signed-off-by: Ilya Dryomov <idryomov@gmail.com> Reviewed-by: Sage Weil <sage@redhat.com> --- fs/ceph/mds_client.c | 12 +++--------- include/linux/ceph/ceph_features.h | 8 +------- 2 files changed, 4 insertions(+), 16 deletions(-) diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c index 67a9aeb2f4ecd..bd13a3267ae03 100644 --- a/fs/ceph/mds_client.c +++ b/fs/ceph/mds_client.c @@ -80,12 +80,8 @@ static int parse_reply_info_in(void **p, void *end, info->symlink = *p; *p += info->symlink_len; - if (features & CEPH_FEATURE_DIRLAYOUTHASH) - ceph_decode_copy_safe(p, end, &info->dir_layout, - sizeof(info->dir_layout), bad); - else - memset(&info->dir_layout, 0, sizeof(info->dir_layout)); - + ceph_decode_copy_safe(p, end, &info->dir_layout, + sizeof(info->dir_layout), bad); ceph_decode_32_safe(p, end, info->xattr_len, bad); ceph_decode_need(p, end, info->xattr_len, bad); info->xattr_data = *p; @@ -3182,10 +3178,8 @@ static void send_mds_reconnect(struct ceph_mds_client *mdsc, recon_state.pagelist = pagelist; if (session->s_con.peer_features & CEPH_FEATURE_MDSENC) recon_state.msg_version = 3; - else if (session->s_con.peer_features & CEPH_FEATURE_FLOCK) - recon_state.msg_version = 2; else - recon_state.msg_version = 1; + recon_state.msg_version = 2; err = iterate_session_caps(session, encode_caps_cb, &recon_state); if (err < 0) goto fail; diff --git a/include/linux/ceph/ceph_features.h b/include/linux/ceph/ceph_features.h index 6b92b3395fa99..65a38c4a02a18 100644 --- a/include/linux/ceph/ceph_features.h +++ b/include/linux/ceph/ceph_features.h @@ -213,12 +213,6 @@ DEFINE_CEPH_FEATURE_DEPRECATED(63, 1, RESERVED_BROKEN, LUMINOUS) // client-facin CEPH_FEATURE_NEW_OSDOPREPLY_ENCODING | \ CEPH_FEATURE_CEPHX_V2) -#define CEPH_FEATURES_REQUIRED_DEFAULT \ - (CEPH_FEATURE_NOSRCADDR | \ - CEPH_FEATURE_SUBSCRIBE2 | \ - CEPH_FEATURE_RECONNECT_SEQ | \ - CEPH_FEATURE_PGID64 | \ - CEPH_FEATURE_PGPOOL3 | \ - CEPH_FEATURE_OSDENC) +#define CEPH_FEATURES_REQUIRED_DEFAULT 0 #endif From 01310bb7c9c98752cc763b36532fab028e0f8f81 Mon Sep 17 00:00:00 2001 From: Scott Mayhew <smayhew@redhat.com> Date: Thu, 8 Nov 2018 11:11:36 -0500 Subject: [PATCH 280/443] nfsd: COPY and CLONE operations require the saved filehandle to be set Make sure we have a saved filehandle, otherwise we'll oops with a null pointer dereference in nfs4_preprocess_stateid_op(). Signed-off-by: Scott Mayhew <smayhew@redhat.com> Cc: stable@vger.kernel.org Signed-off-by: J. Bruce Fields <bfields@redhat.com> --- fs/nfsd/nfs4proc.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c index edff074d38c75..d505990dac7c9 100644 --- a/fs/nfsd/nfs4proc.c +++ b/fs/nfsd/nfs4proc.c @@ -1038,6 +1038,9 @@ nfsd4_verify_copy(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, { __be32 status; + if (!cstate->save_fh.fh_dentry) + return nfserr_nofilehandle; + status = nfs4_preprocess_stateid_op(rqstp, cstate, &cstate->save_fh, src_stateid, RD_STATE, src, NULL); if (status) { From 025911a5f4e36955498ed50806ad1b02f0f76288 Mon Sep 17 00:00:00 2001 From: YueHaibing <yuehaibing@huawei.com> Date: Thu, 8 Nov 2018 02:04:57 +0000 Subject: [PATCH 281/443] SUNRPC: drop pointless static qualifier in xdr_get_next_encode_buffer() There is no need to have the '__be32 *p' variable static since new value always be assigned before use it. Signed-off-by: YueHaibing <yuehaibing@huawei.com> Cc: stable@vger.kernel.org Signed-off-by: J. Bruce Fields <bfields@redhat.com> --- net/sunrpc/xdr.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/sunrpc/xdr.c b/net/sunrpc/xdr.c index 5cfb9e0a18dcb..f302c6eb87790 100644 --- a/net/sunrpc/xdr.c +++ b/net/sunrpc/xdr.c @@ -546,7 +546,7 @@ EXPORT_SYMBOL_GPL(xdr_commit_encode); static __be32 *xdr_get_next_encode_buffer(struct xdr_stream *xdr, size_t nbytes) { - static __be32 *p; + __be32 *p; int space_left; int frag1bytes, frag2bytes; From 017ce359a7187192df5222a00fa3c9055eb3736d Mon Sep 17 00:00:00 2001 From: Arnd Bergmann <arnd@arndb.de> Date: Fri, 2 Nov 2018 12:06:43 +0100 Subject: [PATCH 282/443] ACPI / PMIC: xpower: fix IOSF_MBI dependency We still get a link failure with IOSF_MBI=m when the xpower driver is built-in: drivers/acpi/pmic/intel_pmic_xpower.o: In function `intel_xpower_pmic_update_power': intel_pmic_xpower.c:(.text+0x4f2): undefined reference to `iosf_mbi_block_punit_i2c_access' intel_pmic_xpower.c:(.text+0x5e2): undefined reference to `iosf_mbi_unblock_punit_i2c_access' This makes the dependency stronger, so we can only build when IOSF_MBI is built-in. Fixes: 6a9b593d4b6f (ACPI / PMIC: xpower: Add depends on IOSF_MBI to Kconfig entry) Signed-off-by: Arnd Bergmann <arnd@arndb.de> Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com> --- drivers/acpi/Kconfig | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/acpi/Kconfig b/drivers/acpi/Kconfig index 8f3a444c6ea92..7cea769c37df5 100644 --- a/drivers/acpi/Kconfig +++ b/drivers/acpi/Kconfig @@ -512,7 +512,7 @@ config CRC_PMIC_OPREGION config XPOWER_PMIC_OPREGION bool "ACPI operation region support for XPower AXP288 PMIC" - depends on MFD_AXP20X_I2C && IOSF_MBI + depends on MFD_AXP20X_I2C && IOSF_MBI=y help This config adds ACPI operation region support for XPower AXP288 PMIC. From aeaf6a4b2d9ed4373e39a64c1c10cb1d93dd6bd1 Mon Sep 17 00:00:00 2001 From: Sudeep Holla <sudeep.holla@arm.com> Date: Wed, 7 Nov 2018 17:40:58 +0000 Subject: [PATCH 283/443] dt-bindings: cpufreq: remove stale arm_big_little_dt entry Most of the ARM platforms used v2 OPP bindings to support big-little configurations. This arm_big_little_dt binding is incomplete and was never used. Commit f174e49e4906 (cpufreq: remove unused arm_big_little_dt driver) removed the driver supporting this binding, but the binding was left unnoticed, so let's get rid of it now. Fixes: f174e49e4906 (cpufreq: remove unused arm_big_little_dt driver) Signed-off-by: Sudeep Holla <sudeep.holla@arm.com> Acked-by: Viresh Kumar <viresh.kumar@linaro.org> Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com> --- .../bindings/cpufreq/arm_big_little_dt.txt | 65 ------------------- 1 file changed, 65 deletions(-) delete mode 100644 Documentation/devicetree/bindings/cpufreq/arm_big_little_dt.txt diff --git a/Documentation/devicetree/bindings/cpufreq/arm_big_little_dt.txt b/Documentation/devicetree/bindings/cpufreq/arm_big_little_dt.txt deleted file mode 100644 index 2aa06ac0fac59..0000000000000 --- a/Documentation/devicetree/bindings/cpufreq/arm_big_little_dt.txt +++ /dev/null @@ -1,65 +0,0 @@ -Generic ARM big LITTLE cpufreq driver's DT glue ------------------------------------------------ - -This is DT specific glue layer for generic cpufreq driver for big LITTLE -systems. - -Both required and optional properties listed below must be defined -under node /cpus/cpu@x. Where x is the first cpu inside a cluster. - -FIXME: Cpus should boot in the order specified in DT and all cpus for a cluster -must be present contiguously. Generic DT driver will check only node 'x' for -cpu:x. - -Required properties: -- operating-points: Refer to Documentation/devicetree/bindings/opp/opp.txt - for details - -Optional properties: -- clock-latency: Specify the possible maximum transition latency for clock, - in unit of nanoseconds. - -Examples: - -cpus { - #address-cells = <1>; - #size-cells = <0>; - - cpu@0 { - compatible = "arm,cortex-a15"; - reg = <0>; - next-level-cache = <&L2>; - operating-points = < - /* kHz uV */ - 792000 1100000 - 396000 950000 - 198000 850000 - >; - clock-latency = <61036>; /* two CLK32 periods */ - }; - - cpu@1 { - compatible = "arm,cortex-a15"; - reg = <1>; - next-level-cache = <&L2>; - }; - - cpu@100 { - compatible = "arm,cortex-a7"; - reg = <100>; - next-level-cache = <&L2>; - operating-points = < - /* kHz uV */ - 792000 950000 - 396000 750000 - 198000 450000 - >; - clock-latency = <61036>; /* two CLK32 periods */ - }; - - cpu@101 { - compatible = "arm,cortex-a7"; - reg = <101>; - next-level-cache = <&L2>; - }; -}; From 26a4676faa1ad5d99317e0cd701e5d6f3e716b77 Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel <ard.biesheuvel@linaro.org> Date: Wed, 7 Nov 2018 18:10:38 +0100 Subject: [PATCH 284/443] arm64: mm: define NET_IP_ALIGN to 0 On arm64, there is no need to add 2 bytes of padding to the start of each network buffer just to make the IP header appear 32-bit aligned. Since this might actually adversely affect DMA performance some platforms, let's override NET_IP_ALIGN to 0 to get rid of this padding. Acked-by: Ilias Apalodimas <ilias.apalodimas@linaro.org> Tested-by: Ilias Apalodimas <ilias.apalodimas@linaro.org> Acked-by: Mark Rutland <mark.rutland@arm.com> Acked-by: Will Deacon <will.deacon@arm.com> Signed-off-by: Ard Biesheuvel <ard.biesheuvel@linaro.org> Signed-off-by: Catalin Marinas <catalin.marinas@arm.com> --- arch/arm64/include/asm/processor.h | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/arch/arm64/include/asm/processor.h b/arch/arm64/include/asm/processor.h index 3e2091708b8e5..6b0d4dff50125 100644 --- a/arch/arm64/include/asm/processor.h +++ b/arch/arm64/include/asm/processor.h @@ -24,6 +24,14 @@ #define KERNEL_DS UL(-1) #define USER_DS (TASK_SIZE_64 - 1) +/* + * On arm64 systems, unaligned accesses by the CPU are cheap, and so there is + * no point in shifting all network buffers by 2 bytes just to make some IP + * header fields appear aligned in memory, potentially sacrificing some DMA + * performance on some platforms. + */ +#define NET_IP_ALIGN 0 + #ifndef __ASSEMBLY__ #ifdef __KERNEL__ From 763f191af51f127cf8e69cd361f50bf6180768a5 Mon Sep 17 00:00:00 2001 From: Ulf Hansson <ulf.hansson@linaro.org> Date: Thu, 1 Nov 2018 13:22:38 +0100 Subject: [PATCH 285/443] ARM: cpuidle: Don't register the driver when back-end init returns -ENXIO There's no point to register the cpuidle driver for the current CPU, when the initialization of the arch specific back-end data fails by returning -ENXIO. Instead, let's re-order the sequence to its original flow, by first trying to initialize the back-end part and then act accordingly on the returned error code. Additionally, let's print the error message, no matter of what error code that was returned. Fixes: a0d46a3dfdc3 (ARM: cpuidle: Register per cpuidle device) Signed-off-by: Ulf Hansson <ulf.hansson@linaro.org> Reviewed-by: Daniel Lezcano <daniel.lezcano@linaro.org> Cc: 4.19+ <stable@vger.kernel.org> # v4.19+ Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com> --- drivers/cpuidle/cpuidle-arm.c | 22 ++++++++++------------ 1 file changed, 10 insertions(+), 12 deletions(-) diff --git a/drivers/cpuidle/cpuidle-arm.c b/drivers/cpuidle/cpuidle-arm.c index 073557f433eb1..df564d7832161 100644 --- a/drivers/cpuidle/cpuidle-arm.c +++ b/drivers/cpuidle/cpuidle-arm.c @@ -103,13 +103,6 @@ static int __init arm_idle_init_cpu(int cpu) goto out_kfree_drv; } - ret = cpuidle_register_driver(drv); - if (ret) { - if (ret != -EBUSY) - pr_err("Failed to register cpuidle driver\n"); - goto out_kfree_drv; - } - /* * Call arch CPU operations in order to initialize * idle states suspend back-end specific data @@ -117,15 +110,20 @@ static int __init arm_idle_init_cpu(int cpu) ret = arm_cpuidle_init(cpu); /* - * Skip the cpuidle device initialization if the reported + * Allow the initialization to continue for other CPUs, if the reported * failure is a HW misconfiguration/breakage (-ENXIO). */ - if (ret == -ENXIO) - return 0; - if (ret) { pr_err("CPU %d failed to init idle CPU ops\n", cpu); - goto out_unregister_drv; + ret = ret == -ENXIO ? 0 : ret; + goto out_kfree_drv; + } + + ret = cpuidle_register_driver(drv); + if (ret) { + if (ret != -EBUSY) + pr_err("Failed to register cpuidle driver\n"); + goto out_kfree_drv; } dev = kzalloc(sizeof(*dev), GFP_KERNEL); From 3e452e636d006fa759a9914c044398869acba98f Mon Sep 17 00:00:00 2001 From: Ulf Hansson <ulf.hansson@linaro.org> Date: Thu, 1 Nov 2018 11:15:58 +0100 Subject: [PATCH 286/443] ARM: cpuidle: Convert to use cpuidle_register|unregister() The only reason that remains, to why the ARM cpuidle driver calls cpuidle_register_driver(), is to avoid printing an error message in case another driver already have been registered for the CPU. This seems a bit silly, but more importantly, if that is a common scenario, perhaps we should change cpuidle_register() accordingly instead. In either case, let's consolidate the code, by converting to use cpuidle_register|unregister(), which also avoids the unnecessary allocation of the struct cpuidle_device. Signed-off-by: Ulf Hansson <ulf.hansson@linaro.org> Reviewed-by: Lorenzo Pieralisi <lorenzo.pieralisi@arm.com> Reviewed-by: Daniel Lezcano <daniel.lezcano@linaro.org> Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com> --- drivers/cpuidle/cpuidle-arm.c | 30 +++--------------------------- 1 file changed, 3 insertions(+), 27 deletions(-) diff --git a/drivers/cpuidle/cpuidle-arm.c b/drivers/cpuidle/cpuidle-arm.c index df564d7832161..3a407a3ef22b4 100644 --- a/drivers/cpuidle/cpuidle-arm.c +++ b/drivers/cpuidle/cpuidle-arm.c @@ -82,7 +82,6 @@ static int __init arm_idle_init_cpu(int cpu) { int ret; struct cpuidle_driver *drv; - struct cpuidle_device *dev; drv = kmemdup(&arm_idle_driver, sizeof(*drv), GFP_KERNEL); if (!drv) @@ -119,33 +118,12 @@ static int __init arm_idle_init_cpu(int cpu) goto out_kfree_drv; } - ret = cpuidle_register_driver(drv); - if (ret) { - if (ret != -EBUSY) - pr_err("Failed to register cpuidle driver\n"); + ret = cpuidle_register(drv, NULL); + if (ret) goto out_kfree_drv; - } - - dev = kzalloc(sizeof(*dev), GFP_KERNEL); - if (!dev) { - ret = -ENOMEM; - goto out_unregister_drv; - } - dev->cpu = cpu; - - ret = cpuidle_register_device(dev); - if (ret) { - pr_err("Failed to register cpuidle device for CPU %d\n", - cpu); - goto out_kfree_dev; - } return 0; -out_kfree_dev: - kfree(dev); -out_unregister_drv: - cpuidle_unregister_driver(drv); out_kfree_drv: kfree(drv); return ret; @@ -176,9 +154,7 @@ static int __init arm_idle_init(void) while (--cpu >= 0) { dev = per_cpu(cpuidle_devices, cpu); drv = cpuidle_get_cpu_driver(dev); - cpuidle_unregister_device(dev); - cpuidle_unregister_driver(drv); - kfree(dev); + cpuidle_unregister(drv); kfree(drv); } From 24cc61d8cb5a9232fadf21a830061853c1268fdd Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel <ard.biesheuvel@linaro.org> Date: Wed, 7 Nov 2018 15:16:06 +0100 Subject: [PATCH 287/443] arm64: memblock: don't permit memblock resizing until linear mapping is up Bhupesh reports that having numerous memblock reservations at early boot may result in the following crash: Unable to handle kernel paging request at virtual address ffff80003ffe0000 ... Call trace: __memcpy+0x110/0x180 memblock_add_range+0x134/0x2e8 memblock_reserve+0x70/0xb8 memblock_alloc_base_nid+0x6c/0x88 __memblock_alloc_base+0x3c/0x4c memblock_alloc_base+0x28/0x4c memblock_alloc+0x2c/0x38 early_pgtable_alloc+0x20/0xb0 paging_init+0x28/0x7f8 This is caused by the fact that we permit memblock resizing before the linear mapping is up, and so the memblock_reserved() array is moved into memory that is not mapped yet. So let's ensure that this crash can no longer occur, by deferring to call to memblock_allow_resize() to after the linear mapping has been created. Reported-by: Bhupesh Sharma <bhsharma@redhat.com> Acked-by: Will Deacon <will.deacon@arm.com> Tested-by: Marc Zyngier <marc.zyngier@arm.com> Signed-off-by: Ard Biesheuvel <ard.biesheuvel@linaro.org> Signed-off-by: Catalin Marinas <catalin.marinas@arm.com> --- arch/arm64/mm/init.c | 2 -- arch/arm64/mm/mmu.c | 2 ++ 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/arm64/mm/init.c b/arch/arm64/mm/init.c index 9d9582cac6c40..9b432d9fcada8 100644 --- a/arch/arm64/mm/init.c +++ b/arch/arm64/mm/init.c @@ -483,8 +483,6 @@ void __init arm64_memblock_init(void) high_memory = __va(memblock_end_of_DRAM() - 1) + 1; dma_contiguous_reserve(arm64_dma_phys_limit); - - memblock_allow_resize(); } void __init bootmem_init(void) diff --git a/arch/arm64/mm/mmu.c b/arch/arm64/mm/mmu.c index 394b8d554def4..d1d6601b385d9 100644 --- a/arch/arm64/mm/mmu.c +++ b/arch/arm64/mm/mmu.c @@ -659,6 +659,8 @@ void __init paging_init(void) memblock_free(__pa_symbol(init_pg_dir), __pa_symbol(init_pg_end) - __pa_symbol(init_pg_dir)); + + memblock_allow_resize(); } /* From e2576c8bdfd462c34b8a46c0084e7c30b0851bf4 Mon Sep 17 00:00:00 2001 From: Christian Hewitt <christianshewitt@gmail.com> Date: Tue, 6 Nov 2018 00:08:20 +0100 Subject: [PATCH 288/443] clk: meson-gxbb: set fclk_div3 as CLK_IS_CRITICAL On the Khadas VIM2 (GXM) and LePotato (GXL) board there are problems with reboot; e.g. a ~60 second delay between issuing reboot and the board power cycling (and in some OS configurations reboot will fail and require manual power cycling). Similar to 'commit c987ac6f1f088663b6dad39281071aeb31d450a8 ("clk: meson-gxbb: set fclk_div2 as CLK_IS_CRITICAL")' the SCPI Cortex-M4 Co-Processor seems to depend on FCLK_DIV3 being operational. Until commit 05f814402d6174369b3b29832cbb5eb5ed287059 ("clk: meson: add fdiv clock gates"), this clock was modeled and left on by the bootloader. We don't have precise documentation about the SCPI Co-Processor and its clock requirement so we are learning things the hard way. Marking this clock as critical solves the problem but it should not be viewed as final solution. Ideally, the SCPI driver should claim these clocks. We also depends on some clock hand-off mechanism making its way to CCF, to make sure the clock stays on between its registration and the SCPI driver probe. Fixes: 05f814402d61 ("clk: meson: add fdiv clock gates") Signed-off-by: Christian Hewitt <christianshewitt@gmail.com> Signed-off-by: Jerome Brunet <jbrunet@baylibre.com> Signed-off-by: Stephen Boyd <sboyd@kernel.org> --- drivers/clk/meson/gxbb.c | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/drivers/clk/meson/gxbb.c b/drivers/clk/meson/gxbb.c index 9309cfaaa464e..4ada9668fd49c 100644 --- a/drivers/clk/meson/gxbb.c +++ b/drivers/clk/meson/gxbb.c @@ -506,6 +506,18 @@ static struct clk_regmap gxbb_fclk_div3 = { .ops = &clk_regmap_gate_ops, .parent_names = (const char *[]){ "fclk_div3_div" }, .num_parents = 1, + /* + * FIXME: + * This clock, as fdiv2, is used by the SCPI FW and is required + * by the platform to operate correctly. + * Until the following condition are met, we need this clock to + * be marked as critical: + * a) The SCPI generic driver claims and enable all the clocks + * it needs + * b) CCF has a clock hand-off mechanism to make the sure the + * clock stays on until the proper driver comes along + */ + .flags = CLK_IS_CRITICAL, }, }; From d6ee1e7e9004d3d246cdfa14196989e0a9466c16 Mon Sep 17 00:00:00 2001 From: Jerome Brunet <jbrunet@baylibre.com> Date: Thu, 8 Nov 2018 10:31:23 +0100 Subject: [PATCH 289/443] clk: meson: axg: mark fdiv2 and fdiv3 as critical Similar to gxbb and gxl platforms, axg SCPI Cortex-M co-processor uses the fdiv2 and fdiv3 to, among other things, provide the cpu clock. Until clock hand-off mechanism makes its way to CCF and the generic SCPI claims platform specific clocks, these clocks must be marked as critical to make sure they are never disabled when needed by the co-processor. Fixes: 05f814402d61 ("clk: meson: add fdiv clock gates") Signed-off-by: Jerome Brunet <jbrunet@baylibre.com> Acked-by: Neil Armstrong <narmstrong@baylibre.com> Signed-off-by: Stephen Boyd <sboyd@kernel.org> --- drivers/clk/meson/axg.c | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/drivers/clk/meson/axg.c b/drivers/clk/meson/axg.c index c981159b02c0f..792735d7e46ea 100644 --- a/drivers/clk/meson/axg.c +++ b/drivers/clk/meson/axg.c @@ -325,6 +325,7 @@ static struct clk_regmap axg_fclk_div2 = { .ops = &clk_regmap_gate_ops, .parent_names = (const char *[]){ "fclk_div2_div" }, .num_parents = 1, + .flags = CLK_IS_CRITICAL, }, }; @@ -349,6 +350,18 @@ static struct clk_regmap axg_fclk_div3 = { .ops = &clk_regmap_gate_ops, .parent_names = (const char *[]){ "fclk_div3_div" }, .num_parents = 1, + /* + * FIXME: + * This clock, as fdiv2, is used by the SCPI FW and is required + * by the platform to operate correctly. + * Until the following condition are met, we need this clock to + * be marked as critical: + * a) The SCPI generic driver claims and enable all the clocks + * it needs + * b) CCF has a clock hand-off mechanism to make the sure the + * clock stays on until the proper driver comes along + */ + .flags = CLK_IS_CRITICAL, }, }; From 6778be4e520959659b27a441c06a84c9cb009085 Mon Sep 17 00:00:00 2001 From: Robin Murphy <robin.murphy@arm.com> Date: Wed, 7 Nov 2018 16:30:32 +0000 Subject: [PATCH 290/443] of/device: Really only set bus DMA mask when appropriate of_dma_configure() was *supposed* to be following the same logic as acpi_dma_configure() and only setting bus_dma_mask if some range was specified by the firmware. However, it seems that subtlety got lost in the process of fitting it into the differently-shaped control flow, and as a result the force_dma==true case ends up always setting the bus mask to the 32-bit default, which is not what anyone wants. Make sure we only touch it if the DT actually said so. Fixes: 6c2fb2ea7636 ("of/device: Set bus DMA mask as appropriate") Reported-by: Aaro Koskinen <aaro.koskinen@iki.fi> Reported-by: Jean-Philippe Brucker <jean-philippe.brucker@arm.com> Tested-by: Aaro Koskinen <aaro.koskinen@iki.fi> Tested-by: John Stultz <john.stultz@linaro.org> Tested-by: Geert Uytterhoeven <geert+renesas@glider.be> Tested-by: Robert Richter <robert.richter@cavium.com> Signed-off-by: Robin Murphy <robin.murphy@arm.com> Signed-off-by: Rob Herring <robh@kernel.org> --- drivers/of/device.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/of/device.c b/drivers/of/device.c index 84e810421418b..258742830e366 100644 --- a/drivers/of/device.c +++ b/drivers/of/device.c @@ -149,9 +149,11 @@ int of_dma_configure(struct device *dev, struct device_node *np, bool force_dma) * set by the driver. */ mask = DMA_BIT_MASK(ilog2(dma_addr + size - 1) + 1); - dev->bus_dma_mask = mask; dev->coherent_dma_mask &= mask; *dev->dma_mask &= mask; + /* ...but only set bus mask if we found valid dma-ranges earlier */ + if (!ret) + dev->bus_dma_mask = mask; coherent = of_dma_is_coherent(np); dev_dbg(dev, "device is%sdma coherent\n", From 89c38422e072bb453e3045b8f1b962a344c3edea Mon Sep 17 00:00:00 2001 From: John Garry <john.garry@huawei.com> Date: Thu, 8 Nov 2018 18:17:03 +0800 Subject: [PATCH 291/443] of, numa: Validate some distance map rules Currently the NUMA distance map parsing does not validate the distance table for the distance-matrix rules 1-2 in [1]. However the arch NUMA code may enforce some of these rules, but not all. Such is the case for the arm64 port, which does not enforce the rule that the distance between separates nodes cannot equal LOCAL_DISTANCE. The patch adds the following rules validation: - distance of node to self equals LOCAL_DISTANCE - distance of separate nodes > LOCAL_DISTANCE This change avoids a yet-unresolved crash reported in [2]. A note on dealing with symmetrical distances between nodes: Validating symmetrical distances between nodes is difficult. If it were mandated in the bindings that every distance must be recorded in the table, then it would be easy. However, it isn't. In addition to this, it is also possible to record [b, a] distance only (and not [a, b]). So, when processing the table for [b, a], we cannot assert that current distance of [a, b] != [b, a] as invalid, as [a, b] distance may not be present in the table and current distance would be default at REMOTE_DISTANCE. As such, we maintain the policy that we overwrite distance [a, b] = [b, a] for b > a. This policy is different to kernel ACPI SLIT validation, which allows non-symmetrical distances (ACPI spec SLIT rules allow it). However, the distance debug message is dropped as it may be misleading (for a distance which is later overwritten). Some final notes on semantics: - It is implied that it is the responsibility of the arch NUMA code to reset the NUMA distance map for an error in distance map parsing. - It is the responsibility of the FW NUMA topology parsing (whether OF or ACPI) to enforce NUMA distance rules, and not arch NUMA code. [1] Documents/devicetree/bindings/numa.txt [2] https://www.spinics.net/lists/arm-kernel/msg683304.html Cc: stable@vger.kernel.org # 4.7 Signed-off-by: John Garry <john.garry@huawei.com> Acked-by: Will Deacon <will.deacon@arm.com> Signed-off-by: Rob Herring <robh@kernel.org> --- drivers/of/of_numa.c | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/drivers/of/of_numa.c b/drivers/of/of_numa.c index 35c64a4295e07..fe6b13608e510 100644 --- a/drivers/of/of_numa.c +++ b/drivers/of/of_numa.c @@ -104,9 +104,14 @@ static int __init of_numa_parse_distance_map_v1(struct device_node *map) distance = of_read_number(matrix, 1); matrix++; + if ((nodea == nodeb && distance != LOCAL_DISTANCE) || + (nodea != nodeb && distance <= LOCAL_DISTANCE)) { + pr_err("Invalid distance[node%d -> node%d] = %d\n", + nodea, nodeb, distance); + return -EINVAL; + } + numa_set_distance(nodea, nodeb, distance); - pr_debug("distance[node%d -> node%d] = %d\n", - nodea, nodeb, distance); /* Set default distance of node B->A same as A->B */ if (nodeb > nodea) From e12c225258f2584906765234ca6db4ad4c618192 Mon Sep 17 00:00:00 2001 From: Huazhong Tan <tanhuazhong@huawei.com> Date: Thu, 8 Nov 2018 10:13:19 +0800 Subject: [PATCH 292/443] net: hns3: bugfix for not checking return value hns3_reset_notify_init_enet() only return error early if the return value of hns3_restore_vlan() is not 0. This patch adds checking for the return value of hns3_restore_vlan. Fixes: 7fa6be4fd2f6 ("net: hns3: fix incorrect return value/type of some functions") Signed-off-by: Huazhong Tan <tanhuazhong@huawei.com> Signed-off-by: David S. Miller <davem@davemloft.net> --- drivers/net/ethernet/hisilicon/hns3/hns3_enet.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c b/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c index 3f96aa30068ec..20fcf0d1c2ce5 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c +++ b/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c @@ -3760,7 +3760,8 @@ static int hns3_reset_notify_init_enet(struct hnae3_handle *handle) /* Hardware table is only clear when pf resets */ if (!(handle->flags & HNAE3_SUPPORT_VF)) { ret = hns3_restore_vlan(netdev); - return ret; + if (ret) + return ret; } ret = hns3_restore_fd_rules(netdev); From 0d5b9311baf27bb545f187f12ecfd558220c607d Mon Sep 17 00:00:00 2001 From: Eric Dumazet <edumazet@google.com> Date: Thu, 8 Nov 2018 17:34:27 -0800 Subject: [PATCH 293/443] inet: frags: better deal with smp races MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Multiple cpus might attempt to insert a new fragment in rhashtable, if for example RPS is buggy, as reported by 배석진 in https://patchwork.ozlabs.org/patch/994601/ We use rhashtable_lookup_get_insert_key() instead of rhashtable_insert_fast() to let cpus losing the race free their own inet_frag_queue and use the one that was inserted by another cpu. Fixes: 648700f76b03 ("inet: frags: use rhashtables for reassembly units") Signed-off-by: Eric Dumazet <edumazet@google.com> Reported-by: 배석진 <soukjin.bae@samsung.com> Signed-off-by: David S. Miller <davem@davemloft.net> --- net/ipv4/inet_fragment.c | 29 +++++++++++++++-------------- 1 file changed, 15 insertions(+), 14 deletions(-) diff --git a/net/ipv4/inet_fragment.c b/net/ipv4/inet_fragment.c index bcb11f3a27c0c..760a9e52e02b9 100644 --- a/net/ipv4/inet_fragment.c +++ b/net/ipv4/inet_fragment.c @@ -178,21 +178,22 @@ static struct inet_frag_queue *inet_frag_alloc(struct netns_frags *nf, } static struct inet_frag_queue *inet_frag_create(struct netns_frags *nf, - void *arg) + void *arg, + struct inet_frag_queue **prev) { struct inet_frags *f = nf->f; struct inet_frag_queue *q; - int err; q = inet_frag_alloc(nf, f, arg); - if (!q) + if (!q) { + *prev = ERR_PTR(-ENOMEM); return NULL; - + } mod_timer(&q->timer, jiffies + nf->timeout); - err = rhashtable_insert_fast(&nf->rhashtable, &q->node, - f->rhash_params); - if (err < 0) { + *prev = rhashtable_lookup_get_insert_key(&nf->rhashtable, &q->key, + &q->node, f->rhash_params); + if (*prev) { q->flags |= INET_FRAG_COMPLETE; inet_frag_kill(q); inet_frag_destroy(q); @@ -204,22 +205,22 @@ static struct inet_frag_queue *inet_frag_create(struct netns_frags *nf, /* TODO : call from rcu_read_lock() and no longer use refcount_inc_not_zero() */ struct inet_frag_queue *inet_frag_find(struct netns_frags *nf, void *key) { - struct inet_frag_queue *fq; + struct inet_frag_queue *fq = NULL, *prev; if (!nf->high_thresh || frag_mem_limit(nf) > nf->high_thresh) return NULL; rcu_read_lock(); - fq = rhashtable_lookup(&nf->rhashtable, key, nf->f->rhash_params); - if (fq) { + prev = rhashtable_lookup(&nf->rhashtable, key, nf->f->rhash_params); + if (!prev) + fq = inet_frag_create(nf, key, &prev); + if (prev && !IS_ERR(prev)) { + fq = prev; if (!refcount_inc_not_zero(&fq->refcnt)) fq = NULL; - rcu_read_unlock(); - return fq; } rcu_read_unlock(); - - return inet_frag_create(nf, key); + return fq; } EXPORT_SYMBOL(inet_frag_find); From 39477551df940ddb1339203817de04f5caaacf7a Mon Sep 17 00:00:00 2001 From: Denis Bolotin <denis.bolotin@cavium.com> Date: Thu, 8 Nov 2018 16:46:08 +0200 Subject: [PATCH 294/443] qed: Fix memory/entry leak in qed_init_sp_request() Free the allocated SPQ entry or return the acquired SPQ entry to the free list in error flows. Signed-off-by: Denis Bolotin <denis.bolotin@cavium.com> Signed-off-by: Michal Kalderon <michal.kalderon@cavium.com> Signed-off-by: David S. Miller <davem@davemloft.net> --- .../net/ethernet/qlogic/qed/qed_sp_commands.c | 16 ++++++++++++++-- 1 file changed, 14 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/qlogic/qed/qed_sp_commands.c b/drivers/net/ethernet/qlogic/qed/qed_sp_commands.c index 77b6248ad3b97..e86a1ea236133 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_sp_commands.c +++ b/drivers/net/ethernet/qlogic/qed/qed_sp_commands.c @@ -80,7 +80,7 @@ int qed_sp_init_request(struct qed_hwfn *p_hwfn, case QED_SPQ_MODE_BLOCK: if (!p_data->p_comp_data) - return -EINVAL; + goto err; p_ent->comp_cb.cookie = p_data->p_comp_data->cookie; break; @@ -95,7 +95,7 @@ int qed_sp_init_request(struct qed_hwfn *p_hwfn, default: DP_NOTICE(p_hwfn, "Unknown SPQE completion mode %d\n", p_ent->comp_mode); - return -EINVAL; + goto err; } DP_VERBOSE(p_hwfn, QED_MSG_SPQ, @@ -109,6 +109,18 @@ int qed_sp_init_request(struct qed_hwfn *p_hwfn, memset(&p_ent->ramrod, 0, sizeof(p_ent->ramrod)); return 0; + +err: + /* qed_spq_get_entry() can either get an entry from the free_pool, + * or, if no entries are left, allocate a new entry and add it to + * the unlimited_pending list. + */ + if (p_ent->queue == &p_hwfn->p_spq->unlimited_pending) + kfree(p_ent); + else + qed_spq_return_entry(p_hwfn, p_ent); + + return -EINVAL; } static enum tunnel_clss qed_tunn_clss_to_fw_clss(u8 type) From 2632f22ebd08da249c2017962a199a0cfb2324bf Mon Sep 17 00:00:00 2001 From: Denis Bolotin <denis.bolotin@cavium.com> Date: Thu, 8 Nov 2018 16:46:09 +0200 Subject: [PATCH 295/443] qed: Fix blocking/unlimited SPQ entries leak When there are no SPQ entries left in the free_pool, new entries are allocated and are added to the unlimited list. When an entry in the pool is available, the content is copied from the original entry, and the new entry is sent to the device. qed_spq_post() is not aware of that, so the additional entry is stored in the original entry as p_post_ent, which can later be returned to the pool. Signed-off-by: Denis Bolotin <denis.bolotin@cavium.com> Signed-off-by: Michal Kalderon <michal.kalderon@cavium.com> Signed-off-by: David S. Miller <davem@davemloft.net> --- drivers/net/ethernet/qlogic/qed/qed_sp.h | 3 ++ drivers/net/ethernet/qlogic/qed/qed_spq.c | 57 ++++++++++++----------- 2 files changed, 33 insertions(+), 27 deletions(-) diff --git a/drivers/net/ethernet/qlogic/qed/qed_sp.h b/drivers/net/ethernet/qlogic/qed/qed_sp.h index e95431f6acd46..04259df8a5c24 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_sp.h +++ b/drivers/net/ethernet/qlogic/qed/qed_sp.h @@ -167,6 +167,9 @@ struct qed_spq_entry { enum spq_mode comp_mode; struct qed_spq_comp_cb comp_cb; struct qed_spq_comp_done comp_done; /* SPQ_MODE_EBLOCK */ + + /* Posted entry for unlimited list entry in EBLOCK mode */ + struct qed_spq_entry *post_ent; }; struct qed_eq { diff --git a/drivers/net/ethernet/qlogic/qed/qed_spq.c b/drivers/net/ethernet/qlogic/qed/qed_spq.c index c4a6274dd625c..c1a81ec0524b8 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_spq.c +++ b/drivers/net/ethernet/qlogic/qed/qed_spq.c @@ -685,6 +685,8 @@ static int qed_spq_add_entry(struct qed_hwfn *p_hwfn, /* EBLOCK responsible to free the allocated p_ent */ if (p_ent->comp_mode != QED_SPQ_MODE_EBLOCK) kfree(p_ent); + else + p_ent->post_ent = p_en2; p_ent = p_en2; } @@ -767,6 +769,25 @@ static int qed_spq_pend_post(struct qed_hwfn *p_hwfn) SPQ_HIGH_PRI_RESERVE_DEFAULT); } +/* Avoid overriding of SPQ entries when getting out-of-order completions, by + * marking the completions in a bitmap and increasing the chain consumer only + * for the first successive completed entries. + */ +static void qed_spq_comp_bmap_update(struct qed_hwfn *p_hwfn, __le16 echo) +{ + u16 pos = le16_to_cpu(echo) % SPQ_RING_SIZE; + struct qed_spq *p_spq = p_hwfn->p_spq; + + __set_bit(pos, p_spq->p_comp_bitmap); + while (test_bit(p_spq->comp_bitmap_idx, + p_spq->p_comp_bitmap)) { + __clear_bit(p_spq->comp_bitmap_idx, + p_spq->p_comp_bitmap); + p_spq->comp_bitmap_idx++; + qed_chain_return_produced(&p_spq->chain); + } +} + int qed_spq_post(struct qed_hwfn *p_hwfn, struct qed_spq_entry *p_ent, u8 *fw_return_code) { @@ -824,11 +845,12 @@ int qed_spq_post(struct qed_hwfn *p_hwfn, p_ent->queue == &p_spq->unlimited_pending); if (p_ent->queue == &p_spq->unlimited_pending) { - /* This is an allocated p_ent which does not need to - * return to pool. - */ + struct qed_spq_entry *p_post_ent = p_ent->post_ent; + kfree(p_ent); - return rc; + + /* Return the entry which was actually posted */ + p_ent = p_post_ent; } if (rc) @@ -842,7 +864,7 @@ int qed_spq_post(struct qed_hwfn *p_hwfn, spq_post_fail2: spin_lock_bh(&p_spq->lock); list_del(&p_ent->list); - qed_chain_return_produced(&p_spq->chain); + qed_spq_comp_bmap_update(p_hwfn, p_ent->elem.hdr.echo); spq_post_fail: /* return to the free pool */ @@ -874,25 +896,8 @@ int qed_spq_completion(struct qed_hwfn *p_hwfn, spin_lock_bh(&p_spq->lock); list_for_each_entry_safe(p_ent, tmp, &p_spq->completion_pending, list) { if (p_ent->elem.hdr.echo == echo) { - u16 pos = le16_to_cpu(echo) % SPQ_RING_SIZE; - list_del(&p_ent->list); - - /* Avoid overriding of SPQ entries when getting - * out-of-order completions, by marking the completions - * in a bitmap and increasing the chain consumer only - * for the first successive completed entries. - */ - __set_bit(pos, p_spq->p_comp_bitmap); - - while (test_bit(p_spq->comp_bitmap_idx, - p_spq->p_comp_bitmap)) { - __clear_bit(p_spq->comp_bitmap_idx, - p_spq->p_comp_bitmap); - p_spq->comp_bitmap_idx++; - qed_chain_return_produced(&p_spq->chain); - } - + qed_spq_comp_bmap_update(p_hwfn, echo); p_spq->comp_count++; found = p_ent; break; @@ -931,11 +936,9 @@ int qed_spq_completion(struct qed_hwfn *p_hwfn, QED_MSG_SPQ, "Got a completion without a callback function\n"); - if ((found->comp_mode != QED_SPQ_MODE_EBLOCK) || - (found->queue == &p_spq->unlimited_pending)) + if (found->comp_mode != QED_SPQ_MODE_EBLOCK) /* EBLOCK is responsible for returning its own entry into the - * free list, unless it originally added the entry into the - * unlimited pending list. + * free list. */ qed_spq_return_entry(p_hwfn, found); From fb5e7438e7a3c8966e04ccb0760170e9e06f3699 Mon Sep 17 00:00:00 2001 From: Denis Bolotin <denis.bolotin@cavium.com> Date: Thu, 8 Nov 2018 16:46:10 +0200 Subject: [PATCH 296/443] qed: Fix SPQ entries not returned to pool in error flows qed_sp_destroy_request() API was added for SPQ users that need to free/return the entry they acquired in their error flows. Signed-off-by: Denis Bolotin <denis.bolotin@cavium.com> Signed-off-by: Michal Kalderon <michal.kalderon@cavium.com> Signed-off-by: David S. Miller <davem@davemloft.net> --- drivers/net/ethernet/qlogic/qed/qed_fcoe.c | 11 +++++++--- drivers/net/ethernet/qlogic/qed/qed_iscsi.c | 1 + drivers/net/ethernet/qlogic/qed/qed_l2.c | 12 ++++++---- drivers/net/ethernet/qlogic/qed/qed_rdma.c | 1 + drivers/net/ethernet/qlogic/qed/qed_roce.c | 1 + drivers/net/ethernet/qlogic/qed/qed_sp.h | 11 ++++++++++ .../net/ethernet/qlogic/qed/qed_sp_commands.c | 22 ++++++++++++------- drivers/net/ethernet/qlogic/qed/qed_sriov.c | 1 + 8 files changed, 45 insertions(+), 15 deletions(-) diff --git a/drivers/net/ethernet/qlogic/qed/qed_fcoe.c b/drivers/net/ethernet/qlogic/qed/qed_fcoe.c index cc1b373c0ace5..46dc93d3b9b53 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_fcoe.c +++ b/drivers/net/ethernet/qlogic/qed/qed_fcoe.c @@ -147,7 +147,8 @@ qed_sp_fcoe_func_start(struct qed_hwfn *p_hwfn, "Cannot satisfy CQ amount. CQs requested %d, CQs available %d. Aborting function start\n", fcoe_pf_params->num_cqs, p_hwfn->hw_info.feat_num[QED_FCOE_CQ]); - return -EINVAL; + rc = -EINVAL; + goto err; } p_data->mtu = cpu_to_le16(fcoe_pf_params->mtu); @@ -156,14 +157,14 @@ qed_sp_fcoe_func_start(struct qed_hwfn *p_hwfn, rc = qed_cxt_acquire_cid(p_hwfn, PROTOCOLID_FCOE, &dummy_cid); if (rc) - return rc; + goto err; cxt_info.iid = dummy_cid; rc = qed_cxt_get_cid_info(p_hwfn, &cxt_info); if (rc) { DP_NOTICE(p_hwfn, "Cannot find context info for dummy cid=%d\n", dummy_cid); - return rc; + goto err; } p_cxt = cxt_info.p_cxt; SET_FIELD(p_cxt->tstorm_ag_context.flags3, @@ -240,6 +241,10 @@ qed_sp_fcoe_func_start(struct qed_hwfn *p_hwfn, rc = qed_spq_post(p_hwfn, p_ent, NULL); return rc; + +err: + qed_sp_destroy_request(p_hwfn, p_ent); + return rc; } static int diff --git a/drivers/net/ethernet/qlogic/qed/qed_iscsi.c b/drivers/net/ethernet/qlogic/qed/qed_iscsi.c index 1135387bd99d7..4f8a685d1a55f 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_iscsi.c +++ b/drivers/net/ethernet/qlogic/qed/qed_iscsi.c @@ -200,6 +200,7 @@ qed_sp_iscsi_func_start(struct qed_hwfn *p_hwfn, "Cannot satisfy CQ amount. Queues requested %d, CQs available %d. Aborting function start\n", p_params->num_queues, p_hwfn->hw_info.feat_num[QED_ISCSI_CQ]); + qed_sp_destroy_request(p_hwfn, p_ent); return -EINVAL; } diff --git a/drivers/net/ethernet/qlogic/qed/qed_l2.c b/drivers/net/ethernet/qlogic/qed/qed_l2.c index 82a1bd1f8a8ce..67c02ea939062 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_l2.c +++ b/drivers/net/ethernet/qlogic/qed/qed_l2.c @@ -740,8 +740,7 @@ int qed_sp_vport_update(struct qed_hwfn *p_hwfn, rc = qed_sp_vport_update_rss(p_hwfn, p_ramrod, p_rss_params); if (rc) { - /* Return spq entry which is taken in qed_sp_init_request()*/ - qed_spq_return_entry(p_hwfn, p_ent); + qed_sp_destroy_request(p_hwfn, p_ent); return rc; } @@ -1355,6 +1354,7 @@ qed_filter_ucast_common(struct qed_hwfn *p_hwfn, DP_NOTICE(p_hwfn, "%d is not supported yet\n", p_filter_cmd->opcode); + qed_sp_destroy_request(p_hwfn, *pp_ent); return -EINVAL; } @@ -2056,13 +2056,13 @@ qed_configure_rfs_ntuple_filter(struct qed_hwfn *p_hwfn, } else { rc = qed_fw_vport(p_hwfn, p_params->vport_id, &abs_vport_id); if (rc) - return rc; + goto err; if (p_params->qid != QED_RFS_NTUPLE_QID_RSS) { rc = qed_fw_l2_queue(p_hwfn, p_params->qid, &abs_rx_q_id); if (rc) - return rc; + goto err; p_ramrod->rx_qid_valid = 1; p_ramrod->rx_qid = cpu_to_le16(abs_rx_q_id); @@ -2083,6 +2083,10 @@ qed_configure_rfs_ntuple_filter(struct qed_hwfn *p_hwfn, (u64)p_params->addr, p_params->length); return qed_spq_post(p_hwfn, p_ent, NULL); + +err: + qed_sp_destroy_request(p_hwfn, p_ent); + return rc; } int qed_get_rxq_coalesce(struct qed_hwfn *p_hwfn, diff --git a/drivers/net/ethernet/qlogic/qed/qed_rdma.c b/drivers/net/ethernet/qlogic/qed/qed_rdma.c index c71391b9c757a..62113438c8809 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_rdma.c +++ b/drivers/net/ethernet/qlogic/qed/qed_rdma.c @@ -1514,6 +1514,7 @@ qed_rdma_register_tid(void *rdma_cxt, default: rc = -EINVAL; DP_VERBOSE(p_hwfn, QED_MSG_RDMA, "rc = %d\n", rc); + qed_sp_destroy_request(p_hwfn, p_ent); return rc; } SET_FIELD(p_ramrod->flags1, diff --git a/drivers/net/ethernet/qlogic/qed/qed_roce.c b/drivers/net/ethernet/qlogic/qed/qed_roce.c index f9167d1354bbe..e49fada854108 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_roce.c +++ b/drivers/net/ethernet/qlogic/qed/qed_roce.c @@ -745,6 +745,7 @@ static int qed_roce_sp_destroy_qp_responder(struct qed_hwfn *p_hwfn, DP_NOTICE(p_hwfn, "qed destroy responder failed: cannot allocate memory (ramrod). rc = %d\n", rc); + qed_sp_destroy_request(p_hwfn, p_ent); return rc; } diff --git a/drivers/net/ethernet/qlogic/qed/qed_sp.h b/drivers/net/ethernet/qlogic/qed/qed_sp.h index 04259df8a5c24..3157c0d994417 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_sp.h +++ b/drivers/net/ethernet/qlogic/qed/qed_sp.h @@ -399,6 +399,17 @@ struct qed_sp_init_data { struct qed_spq_comp_cb *p_comp_data; }; +/** + * @brief Returns a SPQ entry to the pool / frees the entry if allocated. + * Should be called on in error flows after initializing the SPQ entry + * and before posting it. + * + * @param p_hwfn + * @param p_ent + */ +void qed_sp_destroy_request(struct qed_hwfn *p_hwfn, + struct qed_spq_entry *p_ent); + int qed_sp_init_request(struct qed_hwfn *p_hwfn, struct qed_spq_entry **pp_ent, u8 cmd, diff --git a/drivers/net/ethernet/qlogic/qed/qed_sp_commands.c b/drivers/net/ethernet/qlogic/qed/qed_sp_commands.c index e86a1ea236133..888274fa208bc 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_sp_commands.c +++ b/drivers/net/ethernet/qlogic/qed/qed_sp_commands.c @@ -47,6 +47,19 @@ #include "qed_sp.h" #include "qed_sriov.h" +void qed_sp_destroy_request(struct qed_hwfn *p_hwfn, + struct qed_spq_entry *p_ent) +{ + /* qed_spq_get_entry() can either get an entry from the free_pool, + * or, if no entries are left, allocate a new entry and add it to + * the unlimited_pending list. + */ + if (p_ent->queue == &p_hwfn->p_spq->unlimited_pending) + kfree(p_ent); + else + qed_spq_return_entry(p_hwfn, p_ent); +} + int qed_sp_init_request(struct qed_hwfn *p_hwfn, struct qed_spq_entry **pp_ent, u8 cmd, u8 protocol, struct qed_sp_init_data *p_data) @@ -111,14 +124,7 @@ int qed_sp_init_request(struct qed_hwfn *p_hwfn, return 0; err: - /* qed_spq_get_entry() can either get an entry from the free_pool, - * or, if no entries are left, allocate a new entry and add it to - * the unlimited_pending list. - */ - if (p_ent->queue == &p_hwfn->p_spq->unlimited_pending) - kfree(p_ent); - else - qed_spq_return_entry(p_hwfn, p_ent); + qed_sp_destroy_request(p_hwfn, p_ent); return -EINVAL; } diff --git a/drivers/net/ethernet/qlogic/qed/qed_sriov.c b/drivers/net/ethernet/qlogic/qed/qed_sriov.c index 9b08a9d9e1513..ca6290fa0f309 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_sriov.c +++ b/drivers/net/ethernet/qlogic/qed/qed_sriov.c @@ -101,6 +101,7 @@ static int qed_sp_vf_start(struct qed_hwfn *p_hwfn, struct qed_vf_info *p_vf) default: DP_NOTICE(p_hwfn, "Unknown VF personality %d\n", p_hwfn->hw_info.personality); + qed_sp_destroy_request(p_hwfn, p_ent); return -EINVAL; } From fa5c448d98f0df660bfcad3dd5facc027ef84cd3 Mon Sep 17 00:00:00 2001 From: Sagiv Ozeri <sagiv.ozeri@cavium.com> Date: Thu, 8 Nov 2018 16:46:11 +0200 Subject: [PATCH 297/443] qed: Fix potential memory corruption A stuck ramrod should be deleted from the completion_pending list, otherwise it will be added again in the future and corrupt the list. Return error value to inform that ramrod is stuck and should be deleted. Signed-off-by: Sagiv Ozeri <sagiv.ozeri@cavium.com> Signed-off-by: Denis Bolotin <denis.bolotin@cavium.com> Signed-off-by: David S. Miller <davem@davemloft.net> --- drivers/net/ethernet/qlogic/qed/qed_spq.c | 12 +++++------- 1 file changed, 5 insertions(+), 7 deletions(-) diff --git a/drivers/net/ethernet/qlogic/qed/qed_spq.c b/drivers/net/ethernet/qlogic/qed/qed_spq.c index c1a81ec0524b8..0a9c5bb0fa486 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_spq.c +++ b/drivers/net/ethernet/qlogic/qed/qed_spq.c @@ -142,6 +142,7 @@ static int qed_spq_block(struct qed_hwfn *p_hwfn, DP_INFO(p_hwfn, "Ramrod is stuck, requesting MCP drain\n"); rc = qed_mcp_drain(p_hwfn, p_ptt); + qed_ptt_release(p_hwfn, p_ptt); if (rc) { DP_NOTICE(p_hwfn, "MCP drain failed\n"); goto err; @@ -150,18 +151,15 @@ static int qed_spq_block(struct qed_hwfn *p_hwfn, /* Retry after drain */ rc = __qed_spq_block(p_hwfn, p_ent, p_fw_ret, true); if (!rc) - goto out; + return 0; comp_done = (struct qed_spq_comp_done *)p_ent->comp_cb.cookie; - if (comp_done->done == 1) + if (comp_done->done == 1) { if (p_fw_ret) *p_fw_ret = comp_done->fw_return_code; -out: - qed_ptt_release(p_hwfn, p_ptt); - return 0; - + return 0; + } err: - qed_ptt_release(p_hwfn, p_ptt); DP_NOTICE(p_hwfn, "Ramrod is stuck [CID %08x cmd %02x protocol %02x echo %04x]\n", le32_to_cpu(p_ent->elem.hdr.cid), From 8137b6ef0ce469154e5cf19f8e7fe04d9a72ac5e Mon Sep 17 00:00:00 2001 From: Thor Thayer <thor.thayer@linux.intel.com> Date: Thu, 8 Nov 2018 11:42:14 -0600 Subject: [PATCH 298/443] net: stmmac: Fix RX packet size > 8191 Ping problems with packets > 8191 as shown: PING 192.168.1.99 (192.168.1.99) 8150(8178) bytes of data. 8158 bytes from 192.168.1.99: icmp_seq=1 ttl=64 time=0.669 ms wrong data byte 8144 should be 0xd0 but was 0x0 16 10 11 12 13 14 15 16 17 18 19 1a 1b 1c 1d 1e 1f 20 21 22 23 24 25 26 27 28 29 2a 2b 2c 2d 2e 2f %< ---------------snip-------------------------------------- 8112 b0 b1 b2 b3 b4 b5 b6 b7 b8 b9 ba bb bc bd be bf c0 c1 c2 c3 c4 c5 c6 c7 c8 c9 ca cb cc cd ce cf 8144 0 0 0 0 d0 d1 ^^^^^^^ Notice the 4 bytes of 0 before the expected byte of d0. Databook notes that the RX buffer must be a multiple of 4/8/16 bytes [1]. Update the DMA Buffer size define to 8188 instead of 8192. Remove the -1 from the RX buffer size allocations and use the new DMA Buffer size directly. [1] Synopsys DesignWare Cores Ethernet MAC Universal v3.70a [section 8.4.2 - Table 8-24] Tested on SoCFPGA Stratix10 with ping sweep from 100 to 8300 byte packets. Fixes: 286a83721720 ("stmmac: add CHAINED descriptor mode support (V4)") Suggested-by: Jose Abreu <jose.abreu@synopsys.com> Signed-off-by: Thor Thayer <thor.thayer@linux.intel.com> Signed-off-by: David S. Miller <davem@davemloft.net> --- drivers/net/ethernet/stmicro/stmmac/common.h | 3 ++- drivers/net/ethernet/stmicro/stmmac/descs_com.h | 2 +- drivers/net/ethernet/stmicro/stmmac/enh_desc.c | 2 +- drivers/net/ethernet/stmicro/stmmac/ring_mode.c | 2 +- 4 files changed, 5 insertions(+), 4 deletions(-) diff --git a/drivers/net/ethernet/stmicro/stmmac/common.h b/drivers/net/ethernet/stmicro/stmmac/common.h index b1b305f8f4143..272b9ca663148 100644 --- a/drivers/net/ethernet/stmicro/stmmac/common.h +++ b/drivers/net/ethernet/stmicro/stmmac/common.h @@ -365,7 +365,8 @@ struct dma_features { /* GMAC TX FIFO is 8K, Rx FIFO is 16K */ #define BUF_SIZE_16KiB 16384 -#define BUF_SIZE_8KiB 8192 +/* RX Buffer size must be < 8191 and multiple of 4/8/16 bytes */ +#define BUF_SIZE_8KiB 8188 #define BUF_SIZE_4KiB 4096 #define BUF_SIZE_2KiB 2048 diff --git a/drivers/net/ethernet/stmicro/stmmac/descs_com.h b/drivers/net/ethernet/stmicro/stmmac/descs_com.h index ca9d7e48034ce..40d6356a7e73c 100644 --- a/drivers/net/ethernet/stmicro/stmmac/descs_com.h +++ b/drivers/net/ethernet/stmicro/stmmac/descs_com.h @@ -31,7 +31,7 @@ /* Enhanced descriptors */ static inline void ehn_desc_rx_set_on_ring(struct dma_desc *p, int end) { - p->des1 |= cpu_to_le32(((BUF_SIZE_8KiB - 1) + p->des1 |= cpu_to_le32((BUF_SIZE_8KiB << ERDES1_BUFFER2_SIZE_SHIFT) & ERDES1_BUFFER2_SIZE_MASK); diff --git a/drivers/net/ethernet/stmicro/stmmac/enh_desc.c b/drivers/net/ethernet/stmicro/stmmac/enh_desc.c index 77914c89d7497..5ef91a790f9d1 100644 --- a/drivers/net/ethernet/stmicro/stmmac/enh_desc.c +++ b/drivers/net/ethernet/stmicro/stmmac/enh_desc.c @@ -262,7 +262,7 @@ static void enh_desc_init_rx_desc(struct dma_desc *p, int disable_rx_ic, int mode, int end) { p->des0 |= cpu_to_le32(RDES0_OWN); - p->des1 |= cpu_to_le32((BUF_SIZE_8KiB - 1) & ERDES1_BUFFER1_SIZE_MASK); + p->des1 |= cpu_to_le32(BUF_SIZE_8KiB & ERDES1_BUFFER1_SIZE_MASK); if (mode == STMMAC_CHAIN_MODE) ehn_desc_rx_set_on_chain(p); diff --git a/drivers/net/ethernet/stmicro/stmmac/ring_mode.c b/drivers/net/ethernet/stmicro/stmmac/ring_mode.c index abc3f85270cd0..d8c5bc4122195 100644 --- a/drivers/net/ethernet/stmicro/stmmac/ring_mode.c +++ b/drivers/net/ethernet/stmicro/stmmac/ring_mode.c @@ -140,7 +140,7 @@ static void clean_desc3(void *priv_ptr, struct dma_desc *p) static int set_16kib_bfsize(int mtu) { int ret = 0; - if (unlikely(mtu >= BUF_SIZE_8KiB)) + if (unlikely(mtu > BUF_SIZE_8KiB)) ret = BUF_SIZE_16KiB; return ret; } From 85b18b0237ce9986a81a1b9534b5e2ee116f5504 Mon Sep 17 00:00:00 2001 From: Stefan Wahren <stefan.wahren@i2se.com> Date: Thu, 8 Nov 2018 20:38:26 +0100 Subject: [PATCH 299/443] net: smsc95xx: Fix MTU range The commit f77f0aee4da4 ("net: use core MTU range checking in USB NIC drivers") introduce a common MTU handling for usbnet. But it's missing the necessary changes for smsc95xx. So set the MTU range accordingly. This patch has been tested on a Raspberry Pi 3. Fixes: f77f0aee4da4 ("net: use core MTU range checking in USB NIC drivers") Signed-off-by: Stefan Wahren <stefan.wahren@i2se.com> Signed-off-by: David S. Miller <davem@davemloft.net> --- drivers/net/usb/smsc95xx.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/net/usb/smsc95xx.c b/drivers/net/usb/smsc95xx.c index 2d17f3b9bb165..f2d01cb6f958c 100644 --- a/drivers/net/usb/smsc95xx.c +++ b/drivers/net/usb/smsc95xx.c @@ -1321,6 +1321,8 @@ static int smsc95xx_bind(struct usbnet *dev, struct usb_interface *intf) dev->net->ethtool_ops = &smsc95xx_ethtool_ops; dev->net->flags |= IFF_MULTICAST; dev->net->hard_header_len += SMSC95XX_TX_OVERHEAD_CSUM; + dev->net->min_mtu = ETH_MIN_MTU; + dev->net->max_mtu = ETH_DATA_LEN; dev->hard_mtu = dev->net->mtu + dev->net->hard_header_len; pdata->dev = dev; From 1457d8cf7664f34c4ba534c1073821a559a2f6f9 Mon Sep 17 00:00:00 2001 From: Juergen Gross <jgross@suse.com> Date: Wed, 7 Nov 2018 18:01:00 +0100 Subject: [PATCH 300/443] x86/xen: fix pv boot Commit 9da3f2b7405440 ("x86/fault: BUG() when uaccess helpers fault on kernel addresses") introduced a regression for booting Xen PV guests. Xen PV guests are using __put_user() and __get_user() for accessing the p2m map (physical to machine frame number map) as accesses might fail in case of not populated areas of the map. With above commit using __put_user() and __get_user() for accessing kernel pages is no longer valid. So replace the Xen hack by adding appropriate p2m access functions using the default fixup handler. Fixes: 9da3f2b7405440 ("x86/fault: BUG() when uaccess helpers fault on kernel addresses") Signed-off-by: Juergen Gross <jgross@suse.com> Reviewed-by: Andrew Cooper <andrew.cooper3@citrix.com> Signed-off-by: Juergen Gross <jgross@suse.com> --- arch/x86/include/asm/xen/page.h | 35 +++++++++++++++++++++++++++++---- arch/x86/xen/p2m.c | 3 +-- 2 files changed, 32 insertions(+), 6 deletions(-) diff --git a/arch/x86/include/asm/xen/page.h b/arch/x86/include/asm/xen/page.h index 123e669bf363d..790ce08e41f20 100644 --- a/arch/x86/include/asm/xen/page.h +++ b/arch/x86/include/asm/xen/page.h @@ -9,7 +9,7 @@ #include <linux/mm.h> #include <linux/device.h> -#include <linux/uaccess.h> +#include <asm/extable.h> #include <asm/page.h> #include <asm/pgtable.h> @@ -93,12 +93,39 @@ clear_foreign_p2m_mapping(struct gnttab_unmap_grant_ref *unmap_ops, */ static inline int xen_safe_write_ulong(unsigned long *addr, unsigned long val) { - return __put_user(val, (unsigned long __user *)addr); + int ret = 0; + + asm volatile("1: mov %[val], %[ptr]\n" + "2:\n" + ".section .fixup, \"ax\"\n" + "3: sub $1, %[ret]\n" + " jmp 2b\n" + ".previous\n" + _ASM_EXTABLE(1b, 3b) + : [ret] "+r" (ret), [ptr] "=m" (*addr) + : [val] "r" (val)); + + return ret; } -static inline int xen_safe_read_ulong(unsigned long *addr, unsigned long *val) +static inline int xen_safe_read_ulong(const unsigned long *addr, + unsigned long *val) { - return __get_user(*val, (unsigned long __user *)addr); + int ret = 0; + unsigned long rval = ~0ul; + + asm volatile("1: mov %[ptr], %[rval]\n" + "2:\n" + ".section .fixup, \"ax\"\n" + "3: sub $1, %[ret]\n" + " jmp 2b\n" + ".previous\n" + _ASM_EXTABLE(1b, 3b) + : [ret] "+r" (ret), [rval] "+r" (rval) + : [ptr] "m" (*addr)); + *val = rval; + + return ret; } #ifdef CONFIG_XEN_PV diff --git a/arch/x86/xen/p2m.c b/arch/x86/xen/p2m.c index d6d74efd8912a..4fe84436d5a7e 100644 --- a/arch/x86/xen/p2m.c +++ b/arch/x86/xen/p2m.c @@ -656,8 +656,7 @@ bool __set_phys_to_machine(unsigned long pfn, unsigned long mfn) /* * The interface requires atomic updates on p2m elements. - * xen_safe_write_ulong() is using __put_user which does an atomic - * store via asm(). + * xen_safe_write_ulong() is using an atomic store via asm(). */ if (likely(!xen_safe_write_ulong(xen_p2m_addr + pfn, mfn))) return true; From 68a031d22c57b94870ba13513c9d93b8a8119ab2 Mon Sep 17 00:00:00 2001 From: John Garry <john.garry@huawei.com> Date: Mon, 5 Nov 2018 20:35:14 +0800 Subject: [PATCH 301/443] crypto: hisilicon - Fix NULL dereference for same dst and src When the source and destination addresses for the cipher are the same, we will get a NULL dereference from accessing the split destination scatterlist memories, as shown: [ 56.565719] tcrypt: [ 56.565719] testing speed of async ecb(aes) (hisi_sec_aes_ecb) encryption [ 56.574683] tcrypt: test 0 (128 bit key, 16 byte blocks): [ 56.587585] Unable to handle kernel NULL pointer dereference at virtual address 0000000000000000 [ 56.596361] Mem abort info: [ 56.599151] ESR = 0x96000006 [ 56.602196] Exception class = DABT (current EL), IL = 32 bits [ 56.608105] SET = 0, FnV = 0 [ 56.611149] EA = 0, S1PTW = 0 [ 56.614280] Data abort info: [ 56.617151] ISV = 0, ISS = 0x00000006 [ 56.620976] CM = 0, WnR = 0 [ 56.623930] user pgtable: 4k pages, 48-bit VAs, pgdp = (____ptrval____) [ 56.630533] [0000000000000000] pgd=0000041fc7e4d003, pud=0000041fcd9bf003, pmd=0000000000000000 [ 56.639224] Internal error: Oops: 96000006 [#1] PREEMPT SMP [ 56.644782] Modules linked in: tcrypt(+) [ 56.648695] CPU: 21 PID: 2326 Comm: insmod Tainted: G W 4.19.0-rc6-00001-g3fabfb8-dirty #716 [ 56.658420] Hardware name: Huawei Taishan 2280 /D05, BIOS Hisilicon D05 IT17 Nemo 2.0 RC0 10/05/2018 [ 56.667537] pstate: 20000005 (nzCv daif -PAN -UAO) [ 56.672322] pc : sec_alg_skcipher_crypto+0x318/0x748 [ 56.677274] lr : sec_alg_skcipher_crypto+0x178/0x748 [ 56.682224] sp : ffff0000118e3840 [ 56.685525] x29: ffff0000118e3840 x28: ffff841fbb3f8118 [ 56.690825] x27: 0000000000000000 x26: 0000000000000000 [ 56.696125] x25: ffff841fbb3f8080 x24: ffff841fbadc0018 [ 56.701425] x23: ffff000009119000 x22: ffff841fbb24e280 [ 56.706724] x21: ffff841ff212e780 x20: ffff841ff212e700 [ 56.712023] x19: 0000000000000001 x18: ffffffffffffffff [ 56.717322] x17: 0000000000000000 x16: 0000000000000000 [ 56.722621] x15: ffff0000091196c8 x14: 72635f7265687069 [ 56.727920] x13: 636b735f676c615f x12: ffff000009119940 [ 56.733219] x11: 0000000000000000 x10: 00000000006080c0 [ 56.738519] x9 : 0000000000000000 x8 : ffff841fbb24e480 [ 56.743818] x7 : ffff841fbb24e500 x6 : ffff841ff00cdcc0 [ 56.749117] x5 : 0000000000000010 x4 : 0000000000000000 [ 56.754416] x3 : ffff841fbb24e380 x2 : ffff841fbb24e480 [ 56.759715] x1 : 0000000000000000 x0 : ffff000008f682c8 [ 56.765016] Process insmod (pid: 2326, stack limit = 0x(____ptrval____)) [ 56.771702] Call trace: [ 56.774136] sec_alg_skcipher_crypto+0x318/0x748 [ 56.778740] sec_alg_skcipher_encrypt+0x10/0x18 [ 56.783259] test_skcipher_speed+0x2a0/0x700 [tcrypt] [ 56.788298] do_test+0x18f8/0x48c8 [tcrypt] [ 56.792469] tcrypt_mod_init+0x60/0x1000 [tcrypt] [ 56.797161] do_one_initcall+0x5c/0x178 [ 56.800985] do_init_module+0x58/0x1b4 [ 56.804721] load_module+0x1da4/0x2150 [ 56.808456] __se_sys_init_module+0x14c/0x1e8 [ 56.812799] __arm64_sys_init_module+0x18/0x20 [ 56.817231] el0_svc_common+0x60/0xe8 [ 56.820880] el0_svc_handler+0x2c/0x80 [ 56.824615] el0_svc+0x8/0xc [ 56.827483] Code: a94c87a3 910b2000 f87b7842 f9004ba2 (b87b7821) [ 56.833564] ---[ end trace 0f63290590e93d94 ]--- Segmentation fault Fix this by only accessing these memories when we have different src and dst. Fixes: 915e4e8413da ("crypto: hisilicon - SEC security accelerator driver") Reviewed-by: Jonathan Cameron <Jonathan.Cameron@huawei.com> Cc: <stable@vger.kernel.org> Signed-off-by: John Garry <john.garry@huawei.com> Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au> --- drivers/crypto/hisilicon/sec/sec_algs.c | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/drivers/crypto/hisilicon/sec/sec_algs.c b/drivers/crypto/hisilicon/sec/sec_algs.c index f7d6d690116ee..32c6c02cb9ae0 100644 --- a/drivers/crypto/hisilicon/sec/sec_algs.c +++ b/drivers/crypto/hisilicon/sec/sec_algs.c @@ -732,6 +732,7 @@ static int sec_alg_skcipher_crypto(struct skcipher_request *skreq, int *splits_in_nents; int *splits_out_nents = NULL; struct sec_request_el *el, *temp; + bool split = skreq->src != skreq->dst; mutex_init(&sec_req->lock); sec_req->req_base = &skreq->base; @@ -750,7 +751,7 @@ static int sec_alg_skcipher_crypto(struct skcipher_request *skreq, if (ret) goto err_free_split_sizes; - if (skreq->src != skreq->dst) { + if (split) { sec_req->len_out = sg_nents(skreq->dst); ret = sec_map_and_split_sg(skreq->dst, split_sizes, steps, &splits_out, &splits_out_nents, @@ -785,8 +786,9 @@ static int sec_alg_skcipher_crypto(struct skcipher_request *skreq, split_sizes[i], skreq->src != skreq->dst, splits_in[i], splits_in_nents[i], - splits_out[i], - splits_out_nents[i], info); + split ? splits_out[i] : NULL, + split ? splits_out_nents[i] : 0, + info); if (IS_ERR(el)) { ret = PTR_ERR(el); goto err_free_elements; @@ -854,7 +856,7 @@ static int sec_alg_skcipher_crypto(struct skcipher_request *skreq, crypto_skcipher_ivsize(atfm), DMA_BIDIRECTIONAL); err_unmap_out_sg: - if (skreq->src != skreq->dst) + if (split) sec_unmap_sg_on_err(skreq->dst, steps, splits_out, splits_out_nents, sec_req->len_out, info->dev); From 0b0cf6af3f3151c26c27e8e51def5527091c3e69 Mon Sep 17 00:00:00 2001 From: John Garry <john.garry@huawei.com> Date: Mon, 5 Nov 2018 20:35:15 +0800 Subject: [PATCH 302/443] crypto: hisilicon - Fix reference after free of memories on error path coccicheck currently warns of the following issues in the driver: drivers/crypto/hisilicon/sec/sec_algs.c:864:51-66: ERROR: reference preceded by free on line 812 drivers/crypto/hisilicon/sec/sec_algs.c:864:40-49: ERROR: reference preceded by free on line 813 drivers/crypto/hisilicon/sec/sec_algs.c:861:8-24: ERROR: reference preceded by free on line 814 drivers/crypto/hisilicon/sec/sec_algs.c:860:41-51: ERROR: reference preceded by free on line 815 drivers/crypto/hisilicon/sec/sec_algs.c:867:7-18: ERROR: reference preceded by free on line 816 It would appear than on certain error paths that we may attempt reference- after-free some memories. This patch fixes those issues. The solution doesn't look perfect, but having same memories free'd possibly from separate functions makes it tricky. Fixes: 915e4e8413da ("crypto: hisilicon - SEC security accelerator driver") Reviewed-by: Jonathan Cameron <Jonathan.Cameron@huawei.com> Cc: <stable@vger.kernel.org> Signed-off-by: John Garry <john.garry@huawei.com> Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au> --- drivers/crypto/hisilicon/sec/sec_algs.c | 21 +++++++++++---------- 1 file changed, 11 insertions(+), 10 deletions(-) diff --git a/drivers/crypto/hisilicon/sec/sec_algs.c b/drivers/crypto/hisilicon/sec/sec_algs.c index 32c6c02cb9ae0..cdc4f9a171d98 100644 --- a/drivers/crypto/hisilicon/sec/sec_algs.c +++ b/drivers/crypto/hisilicon/sec/sec_algs.c @@ -808,13 +808,6 @@ static int sec_alg_skcipher_crypto(struct skcipher_request *skreq, * more refined but this is unlikely to happen so no need. */ - /* Cleanup - all elements in pointer arrays have been coppied */ - kfree(splits_in_nents); - kfree(splits_in); - kfree(splits_out_nents); - kfree(splits_out); - kfree(split_sizes); - /* Grab a big lock for a long time to avoid concurrency issues */ mutex_lock(&queue->queuelock); @@ -829,13 +822,13 @@ static int sec_alg_skcipher_crypto(struct skcipher_request *skreq, (!queue->havesoftqueue || kfifo_avail(&queue->softqueue) > steps)) || !list_empty(&ctx->backlog)) { + ret = -EBUSY; if ((skreq->base.flags & CRYPTO_TFM_REQ_MAY_BACKLOG)) { list_add_tail(&sec_req->backlog_head, &ctx->backlog); mutex_unlock(&queue->queuelock); - return -EBUSY; + goto out; } - ret = -EBUSY; mutex_unlock(&queue->queuelock); goto err_free_elements; } @@ -844,7 +837,15 @@ static int sec_alg_skcipher_crypto(struct skcipher_request *skreq, if (ret) goto err_free_elements; - return -EINPROGRESS; + ret = -EINPROGRESS; +out: + /* Cleanup - all elements in pointer arrays have been copied */ + kfree(splits_in_nents); + kfree(splits_in); + kfree(splits_out_nents); + kfree(splits_out); + kfree(split_sizes); + return ret; err_free_elements: list_for_each_entry_safe(el, temp, &sec_req->elements, head) { From 508a1c4df085a547187eed346f1bfe5e381797f1 Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel <ard.biesheuvel@linaro.org> Date: Thu, 8 Nov 2018 23:55:16 +0100 Subject: [PATCH 303/443] crypto: simd - correctly take reqsize of wrapped skcipher into account The simd wrapper's skcipher request context structure consists of a single subrequest whose size is taken from the subordinate skcipher. However, in simd_skcipher_init(), the reqsize that is retrieved is not from the subordinate skcipher but from the cryptd request structure, whose size is completely unrelated to the actual wrapped skcipher. Reported-by: Qian Cai <cai@gmx.us> Signed-off-by: Ard Biesheuvel <ard.biesheuvel@linaro.org> Tested-by: Qian Cai <cai@gmx.us> Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au> --- crypto/simd.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/crypto/simd.c b/crypto/simd.c index ea7240be3001b..78e8d037ae2b3 100644 --- a/crypto/simd.c +++ b/crypto/simd.c @@ -124,8 +124,9 @@ static int simd_skcipher_init(struct crypto_skcipher *tfm) ctx->cryptd_tfm = cryptd_tfm; - reqsize = sizeof(struct skcipher_request); - reqsize += crypto_skcipher_reqsize(&cryptd_tfm->base); + reqsize = crypto_skcipher_reqsize(cryptd_skcipher_child(cryptd_tfm)); + reqsize = max(reqsize, crypto_skcipher_reqsize(&cryptd_tfm->base)); + reqsize += sizeof(struct skcipher_request); crypto_skcipher_set_reqsize(tfm, reqsize); From f43f39958beb206b53292801e216d9b8a660f087 Mon Sep 17 00:00:00 2001 From: Eric Biggers <ebiggers@google.com> Date: Sat, 3 Nov 2018 14:56:00 -0700 Subject: [PATCH 304/443] crypto: user - fix leaking uninitialized memory to userspace All bytes of the NETLINK_CRYPTO report structures must be initialized, since they are copied to userspace. The change from strncpy() to strlcpy() broke this. As a minimal fix, change it back. Fixes: 4473710df1f8 ("crypto: user - Prepare for CRYPTO_MAX_ALG_NAME expansion") Cc: <stable@vger.kernel.org> # v4.12+ Signed-off-by: Eric Biggers <ebiggers@google.com> Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au> --- crypto/crypto_user_base.c | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/crypto/crypto_user_base.c b/crypto/crypto_user_base.c index e41f6cc33fff4..784748dbb19f0 100644 --- a/crypto/crypto_user_base.c +++ b/crypto/crypto_user_base.c @@ -84,7 +84,7 @@ static int crypto_report_cipher(struct sk_buff *skb, struct crypto_alg *alg) { struct crypto_report_cipher rcipher; - strlcpy(rcipher.type, "cipher", sizeof(rcipher.type)); + strncpy(rcipher.type, "cipher", sizeof(rcipher.type)); rcipher.blocksize = alg->cra_blocksize; rcipher.min_keysize = alg->cra_cipher.cia_min_keysize; @@ -103,7 +103,7 @@ static int crypto_report_comp(struct sk_buff *skb, struct crypto_alg *alg) { struct crypto_report_comp rcomp; - strlcpy(rcomp.type, "compression", sizeof(rcomp.type)); + strncpy(rcomp.type, "compression", sizeof(rcomp.type)); if (nla_put(skb, CRYPTOCFGA_REPORT_COMPRESS, sizeof(struct crypto_report_comp), &rcomp)) goto nla_put_failure; @@ -117,7 +117,7 @@ static int crypto_report_acomp(struct sk_buff *skb, struct crypto_alg *alg) { struct crypto_report_acomp racomp; - strlcpy(racomp.type, "acomp", sizeof(racomp.type)); + strncpy(racomp.type, "acomp", sizeof(racomp.type)); if (nla_put(skb, CRYPTOCFGA_REPORT_ACOMP, sizeof(struct crypto_report_acomp), &racomp)) @@ -132,7 +132,7 @@ static int crypto_report_akcipher(struct sk_buff *skb, struct crypto_alg *alg) { struct crypto_report_akcipher rakcipher; - strlcpy(rakcipher.type, "akcipher", sizeof(rakcipher.type)); + strncpy(rakcipher.type, "akcipher", sizeof(rakcipher.type)); if (nla_put(skb, CRYPTOCFGA_REPORT_AKCIPHER, sizeof(struct crypto_report_akcipher), &rakcipher)) @@ -147,7 +147,7 @@ static int crypto_report_kpp(struct sk_buff *skb, struct crypto_alg *alg) { struct crypto_report_kpp rkpp; - strlcpy(rkpp.type, "kpp", sizeof(rkpp.type)); + strncpy(rkpp.type, "kpp", sizeof(rkpp.type)); if (nla_put(skb, CRYPTOCFGA_REPORT_KPP, sizeof(struct crypto_report_kpp), &rkpp)) @@ -161,10 +161,10 @@ static int crypto_report_kpp(struct sk_buff *skb, struct crypto_alg *alg) static int crypto_report_one(struct crypto_alg *alg, struct crypto_user_alg *ualg, struct sk_buff *skb) { - strlcpy(ualg->cru_name, alg->cra_name, sizeof(ualg->cru_name)); - strlcpy(ualg->cru_driver_name, alg->cra_driver_name, + strncpy(ualg->cru_name, alg->cra_name, sizeof(ualg->cru_name)); + strncpy(ualg->cru_driver_name, alg->cra_driver_name, sizeof(ualg->cru_driver_name)); - strlcpy(ualg->cru_module_name, module_name(alg->cra_module), + strncpy(ualg->cru_module_name, module_name(alg->cra_module), sizeof(ualg->cru_module_name)); ualg->cru_type = 0; @@ -177,7 +177,7 @@ static int crypto_report_one(struct crypto_alg *alg, if (alg->cra_flags & CRYPTO_ALG_LARVAL) { struct crypto_report_larval rl; - strlcpy(rl.type, "larval", sizeof(rl.type)); + strncpy(rl.type, "larval", sizeof(rl.type)); if (nla_put(skb, CRYPTOCFGA_REPORT_LARVAL, sizeof(struct crypto_report_larval), &rl)) goto nla_put_failure; From 9f4debe38415583086ce814798eeb864aeb39551 Mon Sep 17 00:00:00 2001 From: Corentin Labbe <clabbe@baylibre.com> Date: Sat, 3 Nov 2018 14:56:01 -0700 Subject: [PATCH 305/443] crypto: user - Zeroize whole structure given to user space For preventing uninitialized data to be given to user-space (and so leak potential useful data), the crypto_stat structure must be correctly initialized. Reported-by: Dan Carpenter <dan.carpenter@oracle.com> Fixes: cac5818c25d0 ("crypto: user - Implement a generic crypto statistics") Signed-off-by: Corentin Labbe <clabbe@baylibre.com> [EB: also fix it in crypto_reportstat_one()] [EB: use sizeof(var) rather than sizeof(type)] Signed-off-by: Eric Biggers <ebiggers@google.com> Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au> --- crypto/crypto_user_stat.c | 21 +++++++++++++++++++++ 1 file changed, 21 insertions(+) diff --git a/crypto/crypto_user_stat.c b/crypto/crypto_user_stat.c index 021ad06bbb628..1dfaa0ccd555b 100644 --- a/crypto/crypto_user_stat.c +++ b/crypto/crypto_user_stat.c @@ -37,6 +37,8 @@ static int crypto_report_aead(struct sk_buff *skb, struct crypto_alg *alg) u64 v64; u32 v32; + memset(&raead, 0, sizeof(raead)); + strncpy(raead.type, "aead", sizeof(raead.type)); v32 = atomic_read(&alg->encrypt_cnt); @@ -65,6 +67,8 @@ static int crypto_report_cipher(struct sk_buff *skb, struct crypto_alg *alg) u64 v64; u32 v32; + memset(&rcipher, 0, sizeof(rcipher)); + strlcpy(rcipher.type, "cipher", sizeof(rcipher.type)); v32 = atomic_read(&alg->encrypt_cnt); @@ -93,6 +97,8 @@ static int crypto_report_comp(struct sk_buff *skb, struct crypto_alg *alg) u64 v64; u32 v32; + memset(&rcomp, 0, sizeof(rcomp)); + strlcpy(rcomp.type, "compression", sizeof(rcomp.type)); v32 = atomic_read(&alg->compress_cnt); rcomp.stat_compress_cnt = v32; @@ -120,6 +126,8 @@ static int crypto_report_acomp(struct sk_buff *skb, struct crypto_alg *alg) u64 v64; u32 v32; + memset(&racomp, 0, sizeof(racomp)); + strlcpy(racomp.type, "acomp", sizeof(racomp.type)); v32 = atomic_read(&alg->compress_cnt); racomp.stat_compress_cnt = v32; @@ -147,6 +155,8 @@ static int crypto_report_akcipher(struct sk_buff *skb, struct crypto_alg *alg) u64 v64; u32 v32; + memset(&rakcipher, 0, sizeof(rakcipher)); + strncpy(rakcipher.type, "akcipher", sizeof(rakcipher.type)); v32 = atomic_read(&alg->encrypt_cnt); rakcipher.stat_encrypt_cnt = v32; @@ -177,6 +187,8 @@ static int crypto_report_kpp(struct sk_buff *skb, struct crypto_alg *alg) struct crypto_stat rkpp; u32 v; + memset(&rkpp, 0, sizeof(rkpp)); + strlcpy(rkpp.type, "kpp", sizeof(rkpp.type)); v = atomic_read(&alg->setsecret_cnt); @@ -203,6 +215,8 @@ static int crypto_report_ahash(struct sk_buff *skb, struct crypto_alg *alg) u64 v64; u32 v32; + memset(&rhash, 0, sizeof(rhash)); + strncpy(rhash.type, "ahash", sizeof(rhash.type)); v32 = atomic_read(&alg->hash_cnt); @@ -227,6 +241,8 @@ static int crypto_report_shash(struct sk_buff *skb, struct crypto_alg *alg) u64 v64; u32 v32; + memset(&rhash, 0, sizeof(rhash)); + strncpy(rhash.type, "shash", sizeof(rhash.type)); v32 = atomic_read(&alg->hash_cnt); @@ -251,6 +267,8 @@ static int crypto_report_rng(struct sk_buff *skb, struct crypto_alg *alg) u64 v64; u32 v32; + memset(&rrng, 0, sizeof(rrng)); + strncpy(rrng.type, "rng", sizeof(rrng.type)); v32 = atomic_read(&alg->generate_cnt); @@ -275,6 +293,8 @@ static int crypto_reportstat_one(struct crypto_alg *alg, struct crypto_user_alg *ualg, struct sk_buff *skb) { + memset(ualg, 0, sizeof(*ualg)); + strlcpy(ualg->cru_name, alg->cra_name, sizeof(ualg->cru_name)); strlcpy(ualg->cru_driver_name, alg->cra_driver_name, sizeof(ualg->cru_driver_name)); @@ -291,6 +311,7 @@ static int crypto_reportstat_one(struct crypto_alg *alg, if (alg->cra_flags & CRYPTO_ALG_LARVAL) { struct crypto_stat rl; + memset(&rl, 0, sizeof(rl)); strlcpy(rl.type, "larval", sizeof(rl.type)); if (nla_put(skb, CRYPTOCFGA_STAT_LARVAL, sizeof(struct crypto_stat), &rl)) From 595b0674ce781e38522097b18718ce3c3bffc1a1 Mon Sep 17 00:00:00 2001 From: Andy Shevchenko <andriy.shevchenko@linux.intel.com> Date: Mon, 5 Nov 2018 18:33:34 +0200 Subject: [PATCH 306/443] MAINTAINERS: Add tree link for Intel pin control driver Intel pin control driver gets its own tree. Update MAINTAINERS accordingly. Signed-off-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com> Acked-by: Mika Westerberg <mika.westerberg@linux.intel.com> Signed-off-by: Linus Walleij <linus.walleij@linaro.org> --- MAINTAINERS | 1 + 1 file changed, 1 insertion(+) diff --git a/MAINTAINERS b/MAINTAINERS index f4855974f3250..570a8c593b52f 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -11730,6 +11730,7 @@ F: Documentation/devicetree/bindings/pinctrl/fsl,* PIN CONTROLLER - INTEL M: Mika Westerberg <mika.westerberg@linux.intel.com> M: Andy Shevchenko <andriy.shevchenko@linux.intel.com> +T: git git://git.kernel.org/pub/scm/linux/kernel/git/pinctrl/intel.git S: Maintained F: drivers/pinctrl/intel/ From 10283ea525d30f2e99828978fd04d8427876a7ad Mon Sep 17 00:00:00 2001 From: Andreas Gruenbacher <agruenba@redhat.com> Date: Mon, 5 Nov 2018 22:57:24 +0000 Subject: [PATCH 307/443] gfs2: Put bitmap buffers in put_super gfs2_put_super calls gfs2_clear_rgrpd to destroy the gfs2_rgrpd objects attached to the resource group glocks. That function should release the buffers attached to the gfs2_bitmap objects (bi_bh), but the call to gfs2_rgrp_brelse for doing that is missing. When gfs2_releasepage later runs across these buffers which are still referenced, it refuses to free them. This causes the pages the buffers are attached to to remain referenced as well. With enough mount/unmount cycles, the system will eventually run out of memory. Fix this by adding the missing call to gfs2_rgrp_brelse in gfs2_clear_rgrpd. (Also fix a gfs2_rgrp_relse -> gfs2_rgrp_brelse typo in a comment.) Fixes: 39b0f1e92908 ("GFS2: Don't brelse rgrp buffer_heads every allocation") Cc: stable@vger.kernel.org # v4.2+ Signed-off-by: Andreas Gruenbacher <agruenba@redhat.com> --- fs/gfs2/rgrp.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/fs/gfs2/rgrp.c b/fs/gfs2/rgrp.c index ffe3032b1043d..b08a530433adf 100644 --- a/fs/gfs2/rgrp.c +++ b/fs/gfs2/rgrp.c @@ -733,6 +733,7 @@ void gfs2_clear_rgrpd(struct gfs2_sbd *sdp) if (gl) { glock_clear_object(gl, rgd); + gfs2_rgrp_brelse(rgd); gfs2_glock_put(gl); } @@ -1174,7 +1175,7 @@ static u32 count_unlinked(struct gfs2_rgrpd *rgd) * @rgd: the struct gfs2_rgrpd describing the RG to read in * * Read in all of a Resource Group's header and bitmap blocks. - * Caller must eventually call gfs2_rgrp_relse() to free the bitmaps. + * Caller must eventually call gfs2_rgrp_brelse() to free the bitmaps. * * Returns: errno */ From e7445ceddfc220c1aede6d42758a5acb8844e9c3 Mon Sep 17 00:00:00 2001 From: Andreas Gruenbacher <agruenba@redhat.com> Date: Thu, 8 Nov 2018 20:14:29 +0000 Subject: [PATCH 308/443] gfs2: Fix metadata read-ahead during truncate (2) The previous attempt to fix for metadata read-ahead during truncate was incorrect: for files with a height > 2 (1006989312 bytes with a block size of 4096 bytes), read-ahead requests were not being issued for some of the indirect blocks discovered while walking the metadata tree, leading to significant slow-downs when deleting large files. Fix that. In addition, only issue read-ahead requests in the first pass through the meta-data tree, while deallocating data blocks. Fixes: c3ce5aa9b0 ("gfs2: Fix metadata read-ahead during truncate") Cc: stable@vger.kernel.org # v4.16+ Signed-off-by: Andreas Gruenbacher <agruenba@redhat.com> --- fs/gfs2/bmap.c | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) diff --git a/fs/gfs2/bmap.c b/fs/gfs2/bmap.c index 5f3ea07ef5e23..38d88fcb69883 100644 --- a/fs/gfs2/bmap.c +++ b/fs/gfs2/bmap.c @@ -1908,10 +1908,16 @@ static int punch_hole(struct gfs2_inode *ip, u64 offset, u64 length) if (ret < 0) goto out; - /* issue read-ahead on metadata */ - if (mp.mp_aheight > 1) { - for (; ret > 1; ret--) { - metapointer_range(&mp, mp.mp_aheight - ret, + /* On the first pass, issue read-ahead on metadata. */ + if (mp.mp_aheight > 1 && strip_h == ip->i_height - 1) { + unsigned int height = mp.mp_aheight - 1; + + /* No read-ahead for data blocks. */ + if (mp.mp_aheight - 1 == strip_h) + height--; + + for (; height >= mp.mp_aheight - ret; height--) { + metapointer_range(&mp, height, start_list, start_aligned, end_list, end_aligned, &start, &end); From 21d3bbdd4c342f16eac8d70893e45cdfa3381a1e Mon Sep 17 00:00:00 2001 From: Sagi Grimberg <sagi@grimberg.me> Date: Fri, 2 Nov 2018 16:12:21 -0700 Subject: [PATCH 309/443] nvmet: don't try to add ns to p2p map unless it actually uses it Even without CONFIG_P2PDMA this results in a error print: nvmet: no peer-to-peer memory is available that's supported by rxe0 and /dev/nullb0 Fixes: c6925093d0b2 ("nvmet: Optionally use PCI P2P memory") Signed-off-by: Sagi Grimberg <sagi@grimberg.me> Reviewed-by: Logan Gunthorpe <logang@deltatee.com> Signed-off-by: Christoph Hellwig <hch@lst.de> Signed-off-by: Jens Axboe <axboe@kernel.dk> --- drivers/nvme/target/core.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/nvme/target/core.c b/drivers/nvme/target/core.c index f4efe289dc7bc..a5f9bbce863f4 100644 --- a/drivers/nvme/target/core.c +++ b/drivers/nvme/target/core.c @@ -420,7 +420,7 @@ static void nvmet_p2pmem_ns_add_p2p(struct nvmet_ctrl *ctrl, struct pci_dev *p2p_dev; int ret; - if (!ctrl->p2p_client) + if (!ctrl->p2p_client || !ns->use_p2pmem) return; if (ns->p2p_dev) { From 8f676b8508c250bbe255096522fdefb73f1ea0b9 Mon Sep 17 00:00:00 2001 From: Sagi Grimberg <sagi@grimberg.me> Date: Fri, 2 Nov 2018 11:22:13 -0700 Subject: [PATCH 310/443] nvme: make sure ns head inherits underlying device limits Whenever we update ns_head info, we need to make sure it is still compatible with all underlying backing devices because although nvme multipath doesn't have any explicit use of these limits, other devices can still be stacked on top of it which may rely on the underlying limits. Start with unlimited stacking limits, and every info update iterate over siblings and adjust queue limits. Signed-off-by: Sagi Grimberg <sagi@grimberg.me> Signed-off-by: Christoph Hellwig <hch@lst.de> Signed-off-by: Jens Axboe <axboe@kernel.dk> --- drivers/nvme/host/core.c | 4 +++- drivers/nvme/host/multipath.c | 1 + 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c index 2e65be8b1387a..559d567693b8d 100644 --- a/drivers/nvme/host/core.c +++ b/drivers/nvme/host/core.c @@ -1519,8 +1519,10 @@ static void __nvme_revalidate_disk(struct gendisk *disk, struct nvme_id_ns *id) if (ns->ndev) nvme_nvm_update_nvm_info(ns); #ifdef CONFIG_NVME_MULTIPATH - if (ns->head->disk) + if (ns->head->disk) { nvme_update_disk_info(ns->head->disk, ns, id); + blk_queue_stack_limits(ns->head->disk->queue, ns->queue); + } #endif } diff --git a/drivers/nvme/host/multipath.c b/drivers/nvme/host/multipath.c index 5e3cc8c59a394..9901afd804ce3 100644 --- a/drivers/nvme/host/multipath.c +++ b/drivers/nvme/host/multipath.c @@ -285,6 +285,7 @@ int nvme_mpath_alloc_disk(struct nvme_ctrl *ctrl, struct nvme_ns_head *head) blk_queue_flag_set(QUEUE_FLAG_NONROT, q); /* set to a default value for 512 until disk is validated */ blk_queue_logical_block_size(q, 512); + blk_set_stacking_limits(&q->limits); /* we need to propagate up the VMC settings */ if (ctrl->vwc & NVME_CTRL_VWC_PRESENT) From d39aa4979219ca3d61c492f7460f1032b97b9ef2 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig <hch@lst.de> Date: Wed, 7 Nov 2018 09:20:25 +0100 Subject: [PATCH 311/443] Revert "nvmet-rdma: use a private workqueue for delete" This reverts commit 2acf70ade79d26b97611a8df52eb22aa33814cd4. The commit never really fixed the intended issue and caused all kinds of other issues, including a use before initialization. Suggested-by: Sagi Grimberg <sagi@grimberg.me> Signed-off-by: Christoph Hellwig <hch@lst.de> Signed-off-by: Jens Axboe <axboe@kernel.dk> --- drivers/nvme/target/rdma.c | 19 ++++--------------- 1 file changed, 4 insertions(+), 15 deletions(-) diff --git a/drivers/nvme/target/rdma.c b/drivers/nvme/target/rdma.c index ddce100be57a4..3f7971d3706d9 100644 --- a/drivers/nvme/target/rdma.c +++ b/drivers/nvme/target/rdma.c @@ -122,7 +122,6 @@ struct nvmet_rdma_device { int inline_page_count; }; -static struct workqueue_struct *nvmet_rdma_delete_wq; static bool nvmet_rdma_use_srq; module_param_named(use_srq, nvmet_rdma_use_srq, bool, 0444); MODULE_PARM_DESC(use_srq, "Use shared receive queue."); @@ -1274,12 +1273,12 @@ static int nvmet_rdma_queue_connect(struct rdma_cm_id *cm_id, if (queue->host_qid == 0) { /* Let inflight controller teardown complete */ - flush_workqueue(nvmet_rdma_delete_wq); + flush_scheduled_work(); } ret = nvmet_rdma_cm_accept(cm_id, queue, &event->param.conn); if (ret) { - queue_work(nvmet_rdma_delete_wq, &queue->release_work); + schedule_work(&queue->release_work); /* Destroying rdma_cm id is not needed here */ return 0; } @@ -1344,7 +1343,7 @@ static void __nvmet_rdma_queue_disconnect(struct nvmet_rdma_queue *queue) if (disconnect) { rdma_disconnect(queue->cm_id); - queue_work(nvmet_rdma_delete_wq, &queue->release_work); + schedule_work(&queue->release_work); } } @@ -1374,7 +1373,7 @@ static void nvmet_rdma_queue_connect_fail(struct rdma_cm_id *cm_id, mutex_unlock(&nvmet_rdma_queue_mutex); pr_err("failed to connect queue %d\n", queue->idx); - queue_work(nvmet_rdma_delete_wq, &queue->release_work); + schedule_work(&queue->release_work); } /** @@ -1656,17 +1655,8 @@ static int __init nvmet_rdma_init(void) if (ret) goto err_ib_client; - nvmet_rdma_delete_wq = alloc_workqueue("nvmet-rdma-delete-wq", - WQ_UNBOUND | WQ_MEM_RECLAIM | WQ_SYSFS, 0); - if (!nvmet_rdma_delete_wq) { - ret = -ENOMEM; - goto err_unreg_transport; - } - return 0; -err_unreg_transport: - nvmet_unregister_transport(&nvmet_rdma_ops); err_ib_client: ib_unregister_client(&nvmet_rdma_ib_client); return ret; @@ -1674,7 +1664,6 @@ static int __init nvmet_rdma_init(void) static void __exit nvmet_rdma_exit(void) { - destroy_workqueue(nvmet_rdma_delete_wq); nvmet_unregister_transport(&nvmet_rdma_ops); ib_unregister_client(&nvmet_rdma_ib_client); WARN_ON_ONCE(!list_empty(&nvmet_rdma_queue_list)); From 1adfc5e4136f5967d591c399aff95b3b035f16b7 Mon Sep 17 00:00:00 2001 From: Ming Lei <ming.lei@redhat.com> Date: Mon, 29 Oct 2018 20:57:17 +0800 Subject: [PATCH 312/443] block: make sure discard bio is aligned with logical block size Obviously the created discard bio has to be aligned with logical block size. This patch introduces the helper of bio_allowed_max_sectors() for this purpose. Cc: stable@vger.kernel.org Cc: Mike Snitzer <snitzer@redhat.com> Cc: Christoph Hellwig <hch@lst.de> Cc: Xiao Ni <xni@redhat.com> Cc: Mariusz Dabrowski <mariusz.dabrowski@intel.com> Fixes: 744889b7cbb56a6 ("block: don't deal with discard limit in blkdev_issue_discard()") Fixes: a22c4d7e34402cc ("block: re-add discard_granularity and alignment checks") Reported-by: Rui Salvaterra <rsalvaterra@gmail.com> Tested-by: Rui Salvaterra <rsalvaterra@gmail.com> Signed-off-by: Ming Lei <ming.lei@redhat.com> Signed-off-by: Jens Axboe <axboe@kernel.dk> --- block/blk-lib.c | 3 +-- block/blk-merge.c | 3 ++- block/blk.h | 10 ++++++++++ 3 files changed, 13 insertions(+), 3 deletions(-) diff --git a/block/blk-lib.c b/block/blk-lib.c index 76f867ea9a9b9..d56fd159d2e8f 100644 --- a/block/blk-lib.c +++ b/block/blk-lib.c @@ -57,8 +57,7 @@ int __blkdev_issue_discard(struct block_device *bdev, sector_t sector, if (!req_sects) goto fail; - if (req_sects > UINT_MAX >> 9) - req_sects = UINT_MAX >> 9; + req_sects = min(req_sects, bio_allowed_max_sectors(q)); end_sect = sector + req_sects; diff --git a/block/blk-merge.c b/block/blk-merge.c index 208658a901c65..e7696c47489ad 100644 --- a/block/blk-merge.c +++ b/block/blk-merge.c @@ -90,7 +90,8 @@ static struct bio *blk_bio_discard_split(struct request_queue *q, /* Zero-sector (unknown) and one-sector granularities are the same. */ granularity = max(q->limits.discard_granularity >> 9, 1U); - max_discard_sectors = min(q->limits.max_discard_sectors, UINT_MAX >> 9); + max_discard_sectors = min(q->limits.max_discard_sectors, + bio_allowed_max_sectors(q)); max_discard_sectors -= max_discard_sectors % granularity; if (unlikely(!max_discard_sectors)) { diff --git a/block/blk.h b/block/blk.h index c85e53f21cdd8..0089fefdf771d 100644 --- a/block/blk.h +++ b/block/blk.h @@ -395,6 +395,16 @@ static inline unsigned long blk_rq_deadline(struct request *rq) return rq->__deadline & ~0x1UL; } +/* + * The max size one bio can handle is UINT_MAX becasue bvec_iter.bi_size + * is defined as 'unsigned int', meantime it has to aligned to with logical + * block size which is the minimum accepted unit by hardware. + */ +static inline unsigned int bio_allowed_max_sectors(struct request_queue *q) +{ + return round_down(UINT_MAX, queue_logical_block_size(q)) >> 9; +} + /* * Internal io_context interface */ From ba5d73851e71847ba7f7f4c27a1a6e1f5ab91c79 Mon Sep 17 00:00:00 2001 From: Ming Lei <ming.lei@redhat.com> Date: Mon, 29 Oct 2018 20:57:18 +0800 Subject: [PATCH 313/443] block: cleanup __blkdev_issue_discard() Cleanup __blkdev_issue_discard() a bit: - remove local variable of 'end_sect' - remove code block of 'fail' Cc: Mike Snitzer <snitzer@redhat.com> Cc: Christoph Hellwig <hch@lst.de> Cc: Xiao Ni <xni@redhat.com> Cc: Mariusz Dabrowski <mariusz.dabrowski@intel.com> Tested-by: Rui Salvaterra <rsalvaterra@gmail.com> Signed-off-by: Ming Lei <ming.lei@redhat.com> Signed-off-by: Jens Axboe <axboe@kernel.dk> --- block/blk-lib.c | 23 ++++++----------------- 1 file changed, 6 insertions(+), 17 deletions(-) diff --git a/block/blk-lib.c b/block/blk-lib.c index d56fd159d2e8f..d58d5d87dd887 100644 --- a/block/blk-lib.c +++ b/block/blk-lib.c @@ -51,15 +51,12 @@ int __blkdev_issue_discard(struct block_device *bdev, sector_t sector, if ((sector | nr_sects) & bs_mask) return -EINVAL; - while (nr_sects) { - unsigned int req_sects = nr_sects; - sector_t end_sect; - - if (!req_sects) - goto fail; - req_sects = min(req_sects, bio_allowed_max_sectors(q)); + if (!nr_sects) + return -EINVAL; - end_sect = sector + req_sects; + while (nr_sects) { + unsigned int req_sects = min_t(unsigned int, nr_sects, + bio_allowed_max_sectors(q)); bio = blk_next_bio(bio, 0, gfp_mask); bio->bi_iter.bi_sector = sector; @@ -67,8 +64,8 @@ int __blkdev_issue_discard(struct block_device *bdev, sector_t sector, bio_set_op_attrs(bio, op, 0); bio->bi_iter.bi_size = req_sects << 9; + sector += req_sects; nr_sects -= req_sects; - sector = end_sect; /* * We can loop for a long time in here, if someone does @@ -81,14 +78,6 @@ int __blkdev_issue_discard(struct block_device *bdev, sector_t sector, *biop = bio; return 0; - -fail: - if (bio) { - submit_bio_wait(bio); - bio_put(bio); - } - *biop = NULL; - return -EOPNOTSUPP; } EXPORT_SYMBOL(__blkdev_issue_discard); From 34ffec60b27aa81d04e274e71e4c6ef740f75fc7 Mon Sep 17 00:00:00 2001 From: Ming Lei <ming.lei@redhat.com> Date: Mon, 29 Oct 2018 20:57:19 +0800 Subject: [PATCH 314/443] block: make sure writesame bio is aligned with logical block size Obviously the created writesame bio has to be aligned with logical block size, and use bio_allowed_max_sectors() to retrieve this number. Cc: stable@vger.kernel.org Cc: Mike Snitzer <snitzer@redhat.com> Cc: Christoph Hellwig <hch@lst.de> Cc: Xiao Ni <xni@redhat.com> Cc: Mariusz Dabrowski <mariusz.dabrowski@intel.com> Fixes: b49a0871be31a745b2ef ("block: remove split code in blkdev_issue_{discard,write_same}") Tested-by: Rui Salvaterra <rsalvaterra@gmail.com> Signed-off-by: Ming Lei <ming.lei@redhat.com> Signed-off-by: Jens Axboe <axboe@kernel.dk> --- block/blk-lib.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/block/blk-lib.c b/block/blk-lib.c index d58d5d87dd887..e8b3bb9bf3759 100644 --- a/block/blk-lib.c +++ b/block/blk-lib.c @@ -149,7 +149,7 @@ static int __blkdev_issue_write_same(struct block_device *bdev, sector_t sector, return -EOPNOTSUPP; /* Ensure that max_write_same_sectors doesn't overflow bi_size */ - max_write_same_sectors = UINT_MAX >> 9; + max_write_same_sectors = bio_allowed_max_sectors(q); while (nr_sects) { bio = blk_next_bio(bio, 1, gfp_mask); From 7fabaf303458fcabb694999d6fa772cc13d4e217 Mon Sep 17 00:00:00 2001 From: Miklos Szeredi <mszeredi@redhat.com> Date: Fri, 9 Nov 2018 15:52:16 +0100 Subject: [PATCH 315/443] fuse: fix leaked notify reply fuse_request_send_notify_reply() may fail if the connection was reset for some reason (e.g. fs was unmounted). Don't leak request reference in this case. Besides leaking memory, this resulted in fc->num_waiting not being decremented and hence fuse_wait_aborted() left in a hanging and unkillable state. Fixes: 2d45ba381a74 ("fuse: add retrieve request") Fixes: b8f95e5d13f5 ("fuse: umount should wait for all requests") Reported-and-tested-by: syzbot+6339eda9cb4ebbc4c37b@syzkaller.appspotmail.com Signed-off-by: Miklos Szeredi <mszeredi@redhat.com> Cc: <stable@vger.kernel.org> #v2.6.36 --- fs/fuse/dev.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/fs/fuse/dev.c b/fs/fuse/dev.c index ae813e6099321..6fe330cc97091 100644 --- a/fs/fuse/dev.c +++ b/fs/fuse/dev.c @@ -1768,8 +1768,10 @@ static int fuse_retrieve(struct fuse_conn *fc, struct inode *inode, req->in.args[1].size = total_len; err = fuse_request_send_notify_reply(fc, req, outarg->notify_unique); - if (err) + if (err) { fuse_retrieve_end(fc, req); + fuse_put_request(fc, req); + } return err; } From 2d84a2d19b6150c6dbac1e6ebad9c82e4c123772 Mon Sep 17 00:00:00 2001 From: Miklos Szeredi <mszeredi@redhat.com> Date: Fri, 9 Nov 2018 15:52:16 +0100 Subject: [PATCH 316/443] fuse: fix possibly missed wake-up after abort In current fuse_drop_waiting() implementation it's possible that fuse_wait_aborted() will not be woken up in the unlikely case that fuse_abort_conn() + fuse_wait_aborted() runs in between checking fc->connected and calling atomic_dec(&fc->num_waiting). Do the atomic_dec_and_test() unconditionally, which also provides the necessary barrier against reordering with the fc->connected check. The explicit smp_mb() in fuse_wait_aborted() is not actually needed, since the spin_unlock() in fuse_abort_conn() provides the necessary RELEASE barrier after resetting fc->connected. However, this is not a performance sensitive path, and adding the explicit barrier makes it easier to document. Signed-off-by: Miklos Szeredi <mszeredi@redhat.com> Fixes: b8f95e5d13f5 ("fuse: umount should wait for all requests") Cc: <stable@vger.kernel.org> #v4.19 --- fs/fuse/dev.c | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/fs/fuse/dev.c b/fs/fuse/dev.c index 6fe330cc97091..a5e516a40e7a3 100644 --- a/fs/fuse/dev.c +++ b/fs/fuse/dev.c @@ -165,9 +165,13 @@ static bool fuse_block_alloc(struct fuse_conn *fc, bool for_background) static void fuse_drop_waiting(struct fuse_conn *fc) { - if (fc->connected) { - atomic_dec(&fc->num_waiting); - } else if (atomic_dec_and_test(&fc->num_waiting)) { + /* + * lockess check of fc->connected is okay, because atomic_dec_and_test() + * provides a memory barrier mached with the one in fuse_wait_aborted() + * to ensure no wake-up is missed. + */ + if (atomic_dec_and_test(&fc->num_waiting) && + !READ_ONCE(fc->connected)) { /* wake up aborters */ wake_up_all(&fc->blocked_waitq); } @@ -2221,6 +2225,8 @@ EXPORT_SYMBOL_GPL(fuse_abort_conn); void fuse_wait_aborted(struct fuse_conn *fc) { + /* matches implicit memory barrier in fuse_drop_waiting() */ + smp_mb(); wait_event(fc->blocked_waitq, atomic_read(&fc->num_waiting) == 0); } From ebacb81273599555a7a19f7754a1451206a5fc4f Mon Sep 17 00:00:00 2001 From: Lukas Czerner <lczerner@redhat.com> Date: Fri, 9 Nov 2018 14:51:46 +0100 Subject: [PATCH 317/443] fuse: fix use-after-free in fuse_direct_IO() In async IO blocking case the additional reference to the io is taken for it to survive fuse_aio_complete(). In non blocking case this additional reference is not needed, however we still reference io to figure out whether to wait for completion or not. This is wrong and will lead to use-after-free. Fix it by storing blocking information in separate variable. This was spotted by KASAN when running generic/208 fstest. Signed-off-by: Lukas Czerner <lczerner@redhat.com> Reported-by: Zorro Lang <zlang@redhat.com> Signed-off-by: Miklos Szeredi <mszeredi@redhat.com> Fixes: 744742d692e3 ("fuse: Add reference counting for fuse_io_priv") Cc: <stable@vger.kernel.org> # v4.6 --- fs/fuse/file.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/fs/fuse/file.c b/fs/fuse/file.c index cc2121b37bf5f..b52f9baaa3e7b 100644 --- a/fs/fuse/file.c +++ b/fs/fuse/file.c @@ -2924,10 +2924,12 @@ fuse_direct_IO(struct kiocb *iocb, struct iov_iter *iter) } if (io->async) { + bool blocking = io->blocking; + fuse_aio_complete(io, ret < 0 ? ret : 0, -1); /* we have a non-extending, async request, so return */ - if (!io->blocking) + if (!blocking) return -EIOCBQUEUED; wait_for_completion(&wait); From d3132b3860f6cf35ff7609a76bbcdbb814bd027c Mon Sep 17 00:00:00 2001 From: Juergen Gross <jgross@suse.com> Date: Thu, 8 Nov 2018 08:35:06 +0100 Subject: [PATCH 318/443] xen: fix xen_qlock_wait() Commit a856531951dc80 ("xen: make xen_qlock_wait() nestable") introduced a regression for Xen guests running fully virtualized (HVM or PVH mode). The Xen hypervisor wouldn't return from the poll hypercall with interrupts disabled in case of an interrupt (for PV guests it does). So instead of disabling interrupts in xen_qlock_wait() use a nesting counter to avoid calling xen_clear_irq_pending() in case xen_qlock_wait() is nested. Fixes: a856531951dc80 ("xen: make xen_qlock_wait() nestable") Cc: stable@vger.kernel.org Reported-by: Sander Eikelenboom <linux@eikelenboom.it> Signed-off-by: Juergen Gross <jgross@suse.com> Reviewed-by: Boris Ostrovsky <boris.ostrovsky@oracle.com> Tested-by: Sander Eikelenboom <linux@eikelenboom.it> Signed-off-by: Juergen Gross <jgross@suse.com> --- arch/x86/xen/spinlock.c | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/arch/x86/xen/spinlock.c b/arch/x86/xen/spinlock.c index 441c882621698..1c8a8816a402a 100644 --- a/arch/x86/xen/spinlock.c +++ b/arch/x86/xen/spinlock.c @@ -9,6 +9,7 @@ #include <linux/log2.h> #include <linux/gfp.h> #include <linux/slab.h> +#include <linux/atomic.h> #include <asm/paravirt.h> #include <asm/qspinlock.h> @@ -21,6 +22,7 @@ static DEFINE_PER_CPU(int, lock_kicker_irq) = -1; static DEFINE_PER_CPU(char *, irq_name); +static DEFINE_PER_CPU(atomic_t, xen_qlock_wait_nest); static bool xen_pvspin = true; static void xen_qlock_kick(int cpu) @@ -39,25 +41,25 @@ static void xen_qlock_kick(int cpu) */ static void xen_qlock_wait(u8 *byte, u8 val) { - unsigned long flags; int irq = __this_cpu_read(lock_kicker_irq); + atomic_t *nest_cnt = this_cpu_ptr(&xen_qlock_wait_nest); /* If kicker interrupts not initialized yet, just spin */ if (irq == -1 || in_nmi()) return; - /* Guard against reentry. */ - local_irq_save(flags); + /* Detect reentry. */ + atomic_inc(nest_cnt); - /* If irq pending already clear it. */ - if (xen_test_irq_pending(irq)) { + /* If irq pending already and no nested call clear it. */ + if (atomic_read(nest_cnt) == 1 && xen_test_irq_pending(irq)) { xen_clear_irq_pending(irq); } else if (READ_ONCE(*byte) == val) { /* Block until irq becomes pending (or a spurious wakeup) */ xen_poll_irq(irq); } - local_irq_restore(flags); + atomic_dec(nest_cnt); } static irqreturn_t dummy_handler(int irq, void *dev_id) From 3941552aec1e04d63999988a057ae09a1c56ebeb Mon Sep 17 00:00:00 2001 From: Juergen Gross <jgross@suse.com> Date: Thu, 1 Nov 2018 13:33:07 +0100 Subject: [PATCH 319/443] xen: remove size limit of privcmd-buf mapping interface Currently the size of hypercall buffers allocated via /dev/xen/hypercall is limited to a default of 64 memory pages. For live migration of guests this might be too small as the page dirty bitmask needs to be sized according to the size of the guest. This means migrating a 8GB sized guest is already exhausting the default buffer size for the dirty bitmap. There is no sensible way to set a sane limit, so just remove it completely. The device node's usage is limited to root anyway, so there is no additional DOS scenario added by allowing unlimited buffers. While at it make the error path for the -ENOMEM case a little bit cleaner by setting n_pages to the number of successfully allocated pages instead of the target size. Fixes: c51b3c639e01f2 ("xen: add new hypercall buffer mapping device") Cc: <stable@vger.kernel.org> #4.18 Signed-off-by: Juergen Gross <jgross@suse.com> Reviewed-by: Boris Ostrovsky <boris.ostrovsky@oracle.com> Signed-off-by: Juergen Gross <jgross@suse.com> --- drivers/xen/privcmd-buf.c | 22 ++++------------------ 1 file changed, 4 insertions(+), 18 deletions(-) diff --git a/drivers/xen/privcmd-buf.c b/drivers/xen/privcmd-buf.c index df1ed37c3269e..de01a6d0059dc 100644 --- a/drivers/xen/privcmd-buf.c +++ b/drivers/xen/privcmd-buf.c @@ -21,15 +21,9 @@ MODULE_LICENSE("GPL"); -static unsigned int limit = 64; -module_param(limit, uint, 0644); -MODULE_PARM_DESC(limit, "Maximum number of pages that may be allocated by " - "the privcmd-buf device per open file"); - struct privcmd_buf_private { struct mutex lock; struct list_head list; - unsigned int allocated; }; struct privcmd_buf_vma_private { @@ -60,13 +54,10 @@ static void privcmd_buf_vmapriv_free(struct privcmd_buf_vma_private *vma_priv) { unsigned int i; - vma_priv->file_priv->allocated -= vma_priv->n_pages; - list_del(&vma_priv->list); for (i = 0; i < vma_priv->n_pages; i++) - if (vma_priv->pages[i]) - __free_page(vma_priv->pages[i]); + __free_page(vma_priv->pages[i]); kfree(vma_priv); } @@ -146,8 +137,7 @@ static int privcmd_buf_mmap(struct file *file, struct vm_area_struct *vma) unsigned int i; int ret = 0; - if (!(vma->vm_flags & VM_SHARED) || count > limit || - file_priv->allocated + count > limit) + if (!(vma->vm_flags & VM_SHARED)) return -EINVAL; vma_priv = kzalloc(sizeof(*vma_priv) + count * sizeof(void *), @@ -155,19 +145,15 @@ static int privcmd_buf_mmap(struct file *file, struct vm_area_struct *vma) if (!vma_priv) return -ENOMEM; - vma_priv->n_pages = count; - count = 0; - for (i = 0; i < vma_priv->n_pages; i++) { + for (i = 0; i < count; i++) { vma_priv->pages[i] = alloc_page(GFP_KERNEL | __GFP_ZERO); if (!vma_priv->pages[i]) break; - count++; + vma_priv->n_pages++; } mutex_lock(&file_priv->lock); - file_priv->allocated += count; - vma_priv->file_priv = file_priv; vma_priv->users = 1; From ab214c48387aaaadcf8246b4b00d8b78286fde1b Mon Sep 17 00:00:00 2001 From: Vignesh R <vigneshr@ti.com> Date: Fri, 9 Nov 2018 16:44:10 +0530 Subject: [PATCH 320/443] dt-bindings: i2c: omap: Add new compatible for AM654 SoCs AM654 SoCs have same I2C IP as OMAP SoCs. Add new compatible to handle AM654 SoCs. While at that reformat the existing compatible list for older SoCs to list one valid compatible per line. Signed-off-by: Vignesh R <vigneshr@ti.com> Reviewed-by: Rob Herring <robh@kernel.org> Reviewed-by: Grygorii Strashko <grygorii.strashko@ti.com> Signed-off-by: Wolfram Sang <wsa@the-dreams.de> --- Documentation/devicetree/bindings/i2c/i2c-omap.txt | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/Documentation/devicetree/bindings/i2c/i2c-omap.txt b/Documentation/devicetree/bindings/i2c/i2c-omap.txt index 7e49839d41249..4b90ba9f31b70 100644 --- a/Documentation/devicetree/bindings/i2c/i2c-omap.txt +++ b/Documentation/devicetree/bindings/i2c/i2c-omap.txt @@ -1,8 +1,12 @@ I2C for OMAP platforms Required properties : -- compatible : Must be "ti,omap2420-i2c", "ti,omap2430-i2c", "ti,omap3-i2c" - or "ti,omap4-i2c" +- compatible : Must be + "ti,omap2420-i2c" for OMAP2420 SoCs + "ti,omap2430-i2c" for OMAP2430 SoCs + "ti,omap3-i2c" for OMAP3 SoCs + "ti,omap4-i2c" for OMAP4+ SoCs + "ti,am654-i2c", "ti,omap4-i2c" for AM654 SoCs - ti,hwmods : Must be "i2c<n>", n being the instance number (1-based) - #address-cells = <1>; - #size-cells = <0>; From 5b277402deac0691226a947df71c581686bd4020 Mon Sep 17 00:00:00 2001 From: Vignesh R <vigneshr@ti.com> Date: Fri, 9 Nov 2018 16:44:11 +0530 Subject: [PATCH 321/443] i2c: omap: Enable for ARCH_K3 Allow I2C_OMAP to be built for K3 platforms. Signed-off-by: Vignesh R <vigneshr@ti.com> Reviewed-by: Grygorii Strashko <grygorii.strashko@ti.com> Signed-off-by: Wolfram Sang <wsa@the-dreams.de> --- drivers/i2c/busses/Kconfig | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/i2c/busses/Kconfig b/drivers/i2c/busses/Kconfig index 56ccb1ea7da5b..77dc94b440117 100644 --- a/drivers/i2c/busses/Kconfig +++ b/drivers/i2c/busses/Kconfig @@ -752,7 +752,7 @@ config I2C_OCORES config I2C_OMAP tristate "OMAP I2C adapter" - depends on ARCH_OMAP + depends on ARCH_OMAP || ARCH_K3 default y if MACH_OMAP_H3 || MACH_OMAP_OSK help If you say yes to this option, support will be included for the From 350784e79230f85e9007c4c7d8b2259717d67a4d Mon Sep 17 00:00:00 2001 From: Vignesh R <vigneshr@ti.com> Date: Fri, 9 Nov 2018 16:44:12 +0530 Subject: [PATCH 322/443] MAINTAINERS: Add entry for i2c-omap driver Add separate entry for i2c-omap and add my name as maintainer for this driver. Signed-off-by: Vignesh R <vigneshr@ti.com> Acked-by: Tony Lindgren <tony@atomide.com> Signed-off-by: Wolfram Sang <wsa@the-dreams.de> --- MAINTAINERS | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/MAINTAINERS b/MAINTAINERS index f4855974f3250..259fe9445d2fb 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -10784,6 +10784,14 @@ L: linux-omap@vger.kernel.org S: Maintained F: arch/arm/mach-omap2/omap_hwmod.* +OMAP I2C DRIVER +M: Vignesh R <vigneshr@ti.com> +L: linux-omap@vger.kernel.org +L: linux-i2c@vger.kernel.org +S: Maintained +F: Documentation/devicetree/bindings/i2c/i2c-omap.txt +F: drivers/i2c/busses/i2c-omap.c + OMAP IMAGING SUBSYSTEM (OMAP3 ISP and OMAP4 ISS) M: Laurent Pinchart <laurent.pinchart@ideasonboard.com> L: linux-media@vger.kernel.org From 848bd3f3de9d44950c00eda6c115e8e9785440da Mon Sep 17 00:00:00 2001 From: Stephen Boyd <swboyd@chromium.org> Date: Fri, 2 Nov 2018 13:57:32 -0700 Subject: [PATCH 323/443] i2c: qcom-geni: Fix runtime PM mismatch with child devices We need to enable runtime PM on this i2c controller before populating child devices with i2c_add_adapter(). Otherwise, if a child device uses runtime PM and stays runtime PM enabled we'll get the following warning at boot. Enabling runtime PM for inactive device (a98000.i2c) with active children [...] Call trace: pm_runtime_enable+0xd8/0xf8 geni_i2c_probe+0x440/0x460 platform_drv_probe+0x74/0xc8 [...] Let's move the runtime PM enabling and setup to before we add the adapter, so that this device can respond to runtime PM requests from children. Fixes: 37692de5d523 ("i2c: i2c-qcom-geni: Add bus driver for the Qualcomm GENI I2C controller") Signed-off-by: Stephen Boyd <swboyd@chromium.org> Reviewed-by: Douglas Anderson <dianders@chromium.org> Signed-off-by: Wolfram Sang <wsa@the-dreams.de> --- drivers/i2c/busses/i2c-qcom-geni.c | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) diff --git a/drivers/i2c/busses/i2c-qcom-geni.c b/drivers/i2c/busses/i2c-qcom-geni.c index 527f55c8c4c70..db075bc0d9525 100644 --- a/drivers/i2c/busses/i2c-qcom-geni.c +++ b/drivers/i2c/busses/i2c-qcom-geni.c @@ -571,18 +571,19 @@ static int geni_i2c_probe(struct platform_device *pdev) dev_dbg(&pdev->dev, "i2c fifo/se-dma mode. fifo depth:%d\n", tx_depth); - ret = i2c_add_adapter(&gi2c->adap); - if (ret) { - dev_err(&pdev->dev, "Error adding i2c adapter %d\n", ret); - return ret; - } - gi2c->suspended = 1; pm_runtime_set_suspended(gi2c->se.dev); pm_runtime_set_autosuspend_delay(gi2c->se.dev, I2C_AUTO_SUSPEND_DELAY); pm_runtime_use_autosuspend(gi2c->se.dev); pm_runtime_enable(gi2c->se.dev); + ret = i2c_add_adapter(&gi2c->adap); + if (ret) { + dev_err(&pdev->dev, "Error adding i2c adapter %d\n", ret); + pm_runtime_disable(gi2c->se.dev); + return ret; + } + return 0; } @@ -590,8 +591,8 @@ static int geni_i2c_remove(struct platform_device *pdev) { struct geni_i2c_dev *gi2c = platform_get_drvdata(pdev); - pm_runtime_disable(gi2c->se.dev); i2c_del_adapter(&gi2c->adap); + pm_runtime_disable(gi2c->se.dev); return 0; } From eb6984fa4ce2837dcb1f66720a600f31b0bb3739 Mon Sep 17 00:00:00 2001 From: Vasily Averin <vvs@virtuozzo.com> Date: Fri, 9 Nov 2018 11:34:40 -0500 Subject: [PATCH 324/443] ext4: missing !bh check in ext4_xattr_inode_write() According to Ted Ts'o ext4_getblk() called in ext4_xattr_inode_write() should not return bh = NULL The only time that bh could be NULL, then, would be in the case of something really going wrong; a programming error elsewhere (perhaps a wild pointer dereference) or I/O error causing on-disk file system corruption (although that would be highly unlikely given that we had *just* allocated the blocks and so the metadata blocks in question probably would still be in the cache). Fixes: e50e5129f384 ("ext4: xattr-in-inode support") Signed-off-by: Vasily Averin <vvs@virtuozzo.com> Signed-off-by: Theodore Ts'o <tytso@mit.edu> Cc: stable@kernel.org # 4.13 --- fs/ext4/xattr.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/fs/ext4/xattr.c b/fs/ext4/xattr.c index 0b96886835262..7643d52c776c6 100644 --- a/fs/ext4/xattr.c +++ b/fs/ext4/xattr.c @@ -1384,6 +1384,12 @@ static int ext4_xattr_inode_write(handle_t *handle, struct inode *ea_inode, bh = ext4_getblk(handle, ea_inode, block, 0); if (IS_ERR(bh)) return PTR_ERR(bh); + if (!bh) { + WARN_ON_ONCE(1); + EXT4_ERROR_INODE(ea_inode, + "ext4_getblk() return bh = NULL"); + return -EFSCORRUPTED; + } ret = ext4_journal_get_write_access(handle, bh); if (ret) goto out; From c71bcdcb42a7493348d3b45dee8139843bf45efc Mon Sep 17 00:00:00 2001 From: Ajay Gupta <ajayg@nvidia.com> Date: Fri, 26 Oct 2018 09:36:58 -0700 Subject: [PATCH 325/443] i2c: add i2c bus driver for NVIDIA GPU Latest NVIDIA GPU card has USB Type-C interface. There is a Type-C controller which can be accessed over I2C. This driver adds I2C bus driver to communicate with Type-C controller. I2C client driver will be part of USB Type-C UCSI driver. Signed-off-by: Ajay Gupta <ajayg@nvidia.com> Reviewed-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com> [wsa: kept Makefile sorting] Signed-off-by: Wolfram Sang <wsa@the-dreams.de> --- Documentation/i2c/busses/i2c-nvidia-gpu | 18 ++ MAINTAINERS | 7 + drivers/i2c/busses/Kconfig | 9 + drivers/i2c/busses/Makefile | 1 + drivers/i2c/busses/i2c-nvidia-gpu.c | 368 ++++++++++++++++++++++++ 5 files changed, 403 insertions(+) create mode 100644 Documentation/i2c/busses/i2c-nvidia-gpu create mode 100644 drivers/i2c/busses/i2c-nvidia-gpu.c diff --git a/Documentation/i2c/busses/i2c-nvidia-gpu b/Documentation/i2c/busses/i2c-nvidia-gpu new file mode 100644 index 0000000000000..31884d2b2eb5a --- /dev/null +++ b/Documentation/i2c/busses/i2c-nvidia-gpu @@ -0,0 +1,18 @@ +Kernel driver i2c-nvidia-gpu + +Datasheet: not publicly available. + +Authors: + Ajay Gupta <ajayg@nvidia.com> + +Description +----------- + +i2c-nvidia-gpu is a driver for I2C controller included in NVIDIA Turing +and later GPUs and it is used to communicate with Type-C controller on GPUs. + +If your 'lspci -v' listing shows something like the following, + +01:00.3 Serial bus controller [0c80]: NVIDIA Corporation Device 1ad9 (rev a1) + +then this driver should support the I2C controller of your GPU. diff --git a/MAINTAINERS b/MAINTAINERS index 259fe9445d2fb..1835d3ffd7ea3 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -6861,6 +6861,13 @@ L: linux-acpi@vger.kernel.org S: Maintained F: drivers/i2c/i2c-core-acpi.c +I2C CONTROLLER DRIVER FOR NVIDIA GPU +M: Ajay Gupta <ajayg@nvidia.com> +L: linux-i2c@vger.kernel.org +S: Maintained +F: Documentation/i2c/busses/i2c-nvidia-gpu +F: drivers/i2c/busses/i2c-nvidia-gpu.c + I2C MUXES M: Peter Rosin <peda@axentia.se> L: linux-i2c@vger.kernel.org diff --git a/drivers/i2c/busses/Kconfig b/drivers/i2c/busses/Kconfig index 77dc94b440117..f2c6819712013 100644 --- a/drivers/i2c/busses/Kconfig +++ b/drivers/i2c/busses/Kconfig @@ -224,6 +224,15 @@ config I2C_NFORCE2_S4985 This driver can also be built as a module. If so, the module will be called i2c-nforce2-s4985. +config I2C_NVIDIA_GPU + tristate "NVIDIA GPU I2C controller" + depends on PCI + help + If you say yes to this option, support will be included for the + NVIDIA GPU I2C controller which is used to communicate with the GPU's + Type-C controller. This driver can also be built as a module called + i2c-nvidia-gpu. + config I2C_SIS5595 tristate "SiS 5595" depends on PCI diff --git a/drivers/i2c/busses/Makefile b/drivers/i2c/busses/Makefile index 18b26af82b1c5..5f0cb6915969a 100644 --- a/drivers/i2c/busses/Makefile +++ b/drivers/i2c/busses/Makefile @@ -19,6 +19,7 @@ obj-$(CONFIG_I2C_ISCH) += i2c-isch.o obj-$(CONFIG_I2C_ISMT) += i2c-ismt.o obj-$(CONFIG_I2C_NFORCE2) += i2c-nforce2.o obj-$(CONFIG_I2C_NFORCE2_S4985) += i2c-nforce2-s4985.o +obj-$(CONFIG_I2C_NVIDIA_GPU) += i2c-nvidia-gpu.o obj-$(CONFIG_I2C_PIIX4) += i2c-piix4.o obj-$(CONFIG_I2C_SIS5595) += i2c-sis5595.o obj-$(CONFIG_I2C_SIS630) += i2c-sis630.o diff --git a/drivers/i2c/busses/i2c-nvidia-gpu.c b/drivers/i2c/busses/i2c-nvidia-gpu.c new file mode 100644 index 0000000000000..744f5e42636b2 --- /dev/null +++ b/drivers/i2c/busses/i2c-nvidia-gpu.c @@ -0,0 +1,368 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Nvidia GPU I2C controller Driver + * + * Copyright (C) 2018 NVIDIA Corporation. All rights reserved. + * Author: Ajay Gupta <ajayg@nvidia.com> + */ +#include <linux/delay.h> +#include <linux/i2c.h> +#include <linux/interrupt.h> +#include <linux/module.h> +#include <linux/pci.h> +#include <linux/platform_device.h> +#include <linux/pm.h> +#include <linux/pm_runtime.h> + +#include <asm/unaligned.h> + +/* I2C definitions */ +#define I2C_MST_CNTL 0x00 +#define I2C_MST_CNTL_GEN_START BIT(0) +#define I2C_MST_CNTL_GEN_STOP BIT(1) +#define I2C_MST_CNTL_CMD_READ (1 << 2) +#define I2C_MST_CNTL_CMD_WRITE (2 << 2) +#define I2C_MST_CNTL_BURST_SIZE_SHIFT 6 +#define I2C_MST_CNTL_GEN_NACK BIT(28) +#define I2C_MST_CNTL_STATUS GENMASK(30, 29) +#define I2C_MST_CNTL_STATUS_OKAY (0 << 29) +#define I2C_MST_CNTL_STATUS_NO_ACK (1 << 29) +#define I2C_MST_CNTL_STATUS_TIMEOUT (2 << 29) +#define I2C_MST_CNTL_STATUS_BUS_BUSY (3 << 29) +#define I2C_MST_CNTL_CYCLE_TRIGGER BIT(31) + +#define I2C_MST_ADDR 0x04 + +#define I2C_MST_I2C0_TIMING 0x08 +#define I2C_MST_I2C0_TIMING_SCL_PERIOD_100KHZ 0x10e +#define I2C_MST_I2C0_TIMING_TIMEOUT_CLK_CNT 16 +#define I2C_MST_I2C0_TIMING_TIMEOUT_CLK_CNT_MAX 255 +#define I2C_MST_I2C0_TIMING_TIMEOUT_CHECK BIT(24) + +#define I2C_MST_DATA 0x0c + +#define I2C_MST_HYBRID_PADCTL 0x20 +#define I2C_MST_HYBRID_PADCTL_MODE_I2C BIT(0) +#define I2C_MST_HYBRID_PADCTL_I2C_SCL_INPUT_RCV BIT(14) +#define I2C_MST_HYBRID_PADCTL_I2C_SDA_INPUT_RCV BIT(15) + +struct gpu_i2c_dev { + struct device *dev; + void __iomem *regs; + struct i2c_adapter adapter; + struct i2c_board_info *gpu_ccgx_ucsi; +}; + +static void gpu_enable_i2c_bus(struct gpu_i2c_dev *i2cd) +{ + u32 val; + + /* enable I2C */ + val = readl(i2cd->regs + I2C_MST_HYBRID_PADCTL); + val |= I2C_MST_HYBRID_PADCTL_MODE_I2C | + I2C_MST_HYBRID_PADCTL_I2C_SCL_INPUT_RCV | + I2C_MST_HYBRID_PADCTL_I2C_SDA_INPUT_RCV; + writel(val, i2cd->regs + I2C_MST_HYBRID_PADCTL); + + /* enable 100KHZ mode */ + val = I2C_MST_I2C0_TIMING_SCL_PERIOD_100KHZ; + val |= (I2C_MST_I2C0_TIMING_TIMEOUT_CLK_CNT_MAX + << I2C_MST_I2C0_TIMING_TIMEOUT_CLK_CNT); + val |= I2C_MST_I2C0_TIMING_TIMEOUT_CHECK; + writel(val, i2cd->regs + I2C_MST_I2C0_TIMING); +} + +static int gpu_i2c_check_status(struct gpu_i2c_dev *i2cd) +{ + unsigned long target = jiffies + msecs_to_jiffies(1000); + u32 val; + + do { + val = readl(i2cd->regs + I2C_MST_CNTL); + if (!(val & I2C_MST_CNTL_CYCLE_TRIGGER)) + break; + if ((val & I2C_MST_CNTL_STATUS) != + I2C_MST_CNTL_STATUS_BUS_BUSY) + break; + usleep_range(500, 600); + } while (time_is_after_jiffies(target)); + + if (time_is_before_jiffies(target)) { + dev_err(i2cd->dev, "i2c timeout error %x\n", val); + return -ETIME; + } + + val = readl(i2cd->regs + I2C_MST_CNTL); + switch (val & I2C_MST_CNTL_STATUS) { + case I2C_MST_CNTL_STATUS_OKAY: + return 0; + case I2C_MST_CNTL_STATUS_NO_ACK: + return -EIO; + case I2C_MST_CNTL_STATUS_TIMEOUT: + return -ETIME; + default: + return 0; + } +} + +static int gpu_i2c_read(struct gpu_i2c_dev *i2cd, u8 *data, u16 len) +{ + int status; + u32 val; + + val = I2C_MST_CNTL_GEN_START | I2C_MST_CNTL_CMD_READ | + (len << I2C_MST_CNTL_BURST_SIZE_SHIFT) | + I2C_MST_CNTL_CYCLE_TRIGGER | I2C_MST_CNTL_GEN_NACK; + writel(val, i2cd->regs + I2C_MST_CNTL); + + status = gpu_i2c_check_status(i2cd); + if (status < 0) + return status; + + val = readl(i2cd->regs + I2C_MST_DATA); + switch (len) { + case 1: + data[0] = val; + break; + case 2: + put_unaligned_be16(val, data); + break; + case 3: + put_unaligned_be16(val >> 8, data); + data[2] = val; + break; + case 4: + put_unaligned_be32(val, data); + break; + default: + break; + } + return status; +} + +static int gpu_i2c_start(struct gpu_i2c_dev *i2cd) +{ + writel(I2C_MST_CNTL_GEN_START, i2cd->regs + I2C_MST_CNTL); + return gpu_i2c_check_status(i2cd); +} + +static int gpu_i2c_stop(struct gpu_i2c_dev *i2cd) +{ + writel(I2C_MST_CNTL_GEN_STOP, i2cd->regs + I2C_MST_CNTL); + return gpu_i2c_check_status(i2cd); +} + +static int gpu_i2c_write(struct gpu_i2c_dev *i2cd, u8 data) +{ + u32 val; + + writel(data, i2cd->regs + I2C_MST_DATA); + + val = I2C_MST_CNTL_CMD_WRITE | (1 << I2C_MST_CNTL_BURST_SIZE_SHIFT); + writel(val, i2cd->regs + I2C_MST_CNTL); + + return gpu_i2c_check_status(i2cd); +} + +static int gpu_i2c_master_xfer(struct i2c_adapter *adap, + struct i2c_msg *msgs, int num) +{ + struct gpu_i2c_dev *i2cd = i2c_get_adapdata(adap); + int status, status2; + int i, j; + + /* + * The controller supports maximum 4 byte read due to known + * limitation of sending STOP after every read. + */ + for (i = 0; i < num; i++) { + if (msgs[i].flags & I2C_M_RD) { + /* program client address before starting read */ + writel(msgs[i].addr, i2cd->regs + I2C_MST_ADDR); + /* gpu_i2c_read has implicit start */ + status = gpu_i2c_read(i2cd, msgs[i].buf, msgs[i].len); + if (status < 0) + goto stop; + } else { + u8 addr = i2c_8bit_addr_from_msg(msgs + i); + + status = gpu_i2c_start(i2cd); + if (status < 0) { + if (i == 0) + return status; + goto stop; + } + + status = gpu_i2c_write(i2cd, addr); + if (status < 0) + goto stop; + + for (j = 0; j < msgs[i].len; j++) { + status = gpu_i2c_write(i2cd, msgs[i].buf[j]); + if (status < 0) + goto stop; + } + } + } + status = gpu_i2c_stop(i2cd); + if (status < 0) + return status; + + return i; +stop: + status2 = gpu_i2c_stop(i2cd); + if (status2 < 0) + dev_err(i2cd->dev, "i2c stop failed %d\n", status2); + return status; +} + +static const struct i2c_adapter_quirks gpu_i2c_quirks = { + .max_read_len = 4, + .flags = I2C_AQ_COMB_WRITE_THEN_READ, +}; + +static u32 gpu_i2c_functionality(struct i2c_adapter *adap) +{ + return I2C_FUNC_I2C | I2C_FUNC_SMBUS_EMUL; +} + +static const struct i2c_algorithm gpu_i2c_algorithm = { + .master_xfer = gpu_i2c_master_xfer, + .functionality = gpu_i2c_functionality, +}; + +/* + * This driver is for Nvidia GPU cards with USB Type-C interface. + * We want to identify the cards using vendor ID and class code only + * to avoid dependency of adding product id for any new card which + * requires this driver. + * Currently there is no class code defined for UCSI device over PCI + * so using UNKNOWN class for now and it will be updated when UCSI + * over PCI gets a class code. + * There is no other NVIDIA cards with UNKNOWN class code. Even if the + * driver gets loaded for an undesired card then eventually i2c_read() + * (initiated from UCSI i2c_client) will timeout or UCSI commands will + * timeout. + */ +#define PCI_CLASS_SERIAL_UNKNOWN 0x0c80 +static const struct pci_device_id gpu_i2c_ids[] = { + { PCI_VENDOR_ID_NVIDIA, PCI_ANY_ID, PCI_ANY_ID, PCI_ANY_ID, + PCI_CLASS_SERIAL_UNKNOWN << 8, 0xffffff00}, + { } +}; +MODULE_DEVICE_TABLE(pci, gpu_i2c_ids); + +static int gpu_populate_client(struct gpu_i2c_dev *i2cd, int irq) +{ + struct i2c_client *ccgx_client; + + i2cd->gpu_ccgx_ucsi = devm_kzalloc(i2cd->dev, + sizeof(*i2cd->gpu_ccgx_ucsi), + GFP_KERNEL); + if (!i2cd->gpu_ccgx_ucsi) + return -ENOMEM; + + strlcpy(i2cd->gpu_ccgx_ucsi->type, "ccgx-ucsi", + sizeof(i2cd->gpu_ccgx_ucsi->type)); + i2cd->gpu_ccgx_ucsi->addr = 0x8; + i2cd->gpu_ccgx_ucsi->irq = irq; + ccgx_client = i2c_new_device(&i2cd->adapter, i2cd->gpu_ccgx_ucsi); + if (!ccgx_client) + return -ENODEV; + + return 0; +} + +static int gpu_i2c_probe(struct pci_dev *pdev, const struct pci_device_id *id) +{ + struct gpu_i2c_dev *i2cd; + int status; + + i2cd = devm_kzalloc(&pdev->dev, sizeof(*i2cd), GFP_KERNEL); + if (!i2cd) + return -ENOMEM; + + i2cd->dev = &pdev->dev; + dev_set_drvdata(&pdev->dev, i2cd); + + status = pcim_enable_device(pdev); + if (status < 0) { + dev_err(&pdev->dev, "pcim_enable_device failed %d\n", status); + return status; + } + + pci_set_master(pdev); + + i2cd->regs = pcim_iomap(pdev, 0, 0); + if (!i2cd->regs) { + dev_err(&pdev->dev, "pcim_iomap failed\n"); + return -ENOMEM; + } + + status = pci_alloc_irq_vectors(pdev, 1, 1, PCI_IRQ_MSI); + if (status < 0) { + dev_err(&pdev->dev, "pci_alloc_irq_vectors err %d\n", status); + return status; + } + + gpu_enable_i2c_bus(i2cd); + + i2c_set_adapdata(&i2cd->adapter, i2cd); + i2cd->adapter.owner = THIS_MODULE; + strlcpy(i2cd->adapter.name, "NVIDIA GPU I2C adapter", + sizeof(i2cd->adapter.name)); + i2cd->adapter.algo = &gpu_i2c_algorithm; + i2cd->adapter.quirks = &gpu_i2c_quirks; + i2cd->adapter.dev.parent = &pdev->dev; + status = i2c_add_adapter(&i2cd->adapter); + if (status < 0) + goto free_irq_vectors; + + status = gpu_populate_client(i2cd, pdev->irq); + if (status < 0) { + dev_err(&pdev->dev, "gpu_populate_client failed %d\n", status); + goto del_adapter; + } + + return 0; + +del_adapter: + i2c_del_adapter(&i2cd->adapter); +free_irq_vectors: + pci_free_irq_vectors(pdev); + return status; +} + +static void gpu_i2c_remove(struct pci_dev *pdev) +{ + struct gpu_i2c_dev *i2cd = dev_get_drvdata(&pdev->dev); + + i2c_del_adapter(&i2cd->adapter); + pci_free_irq_vectors(pdev); +} + +static int gpu_i2c_resume(struct device *dev) +{ + struct gpu_i2c_dev *i2cd = dev_get_drvdata(dev); + + gpu_enable_i2c_bus(i2cd); + return 0; +} + +UNIVERSAL_DEV_PM_OPS(gpu_i2c_driver_pm, NULL, gpu_i2c_resume, NULL); + +static struct pci_driver gpu_i2c_driver = { + .name = "nvidia-gpu", + .id_table = gpu_i2c_ids, + .probe = gpu_i2c_probe, + .remove = gpu_i2c_remove, + .driver = { + .pm = &gpu_i2c_driver_pm, + }, +}; + +module_pci_driver(gpu_i2c_driver); + +MODULE_AUTHOR("Ajay Gupta <ajayg@nvidia.com>"); +MODULE_DESCRIPTION("Nvidia GPU I2C controller Driver"); +MODULE_LICENSE("GPL v2"); From caccdcc5dbec0dd9643fe1667893e78631a4d38e Mon Sep 17 00:00:00 2001 From: Wolfram Sang <wsa@the-dreams.de> Date: Fri, 9 Nov 2018 17:54:38 +0100 Subject: [PATCH 326/443] i2c: nvidia-gpu: make pm_ops static sparse rightfully says: warning: symbol 'gpu_i2c_driver_pm' was not declared. Should it be static? Signed-off-by: Wolfram Sang <wsa@the-dreams.de> --- drivers/i2c/busses/i2c-nvidia-gpu.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/i2c/busses/i2c-nvidia-gpu.c b/drivers/i2c/busses/i2c-nvidia-gpu.c index 744f5e42636b2..8822357bca0c3 100644 --- a/drivers/i2c/busses/i2c-nvidia-gpu.c +++ b/drivers/i2c/busses/i2c-nvidia-gpu.c @@ -349,7 +349,7 @@ static int gpu_i2c_resume(struct device *dev) return 0; } -UNIVERSAL_DEV_PM_OPS(gpu_i2c_driver_pm, NULL, gpu_i2c_resume, NULL); +static UNIVERSAL_DEV_PM_OPS(gpu_i2c_driver_pm, NULL, gpu_i2c_resume, NULL); static struct pci_driver gpu_i2c_driver = { .name = "nvidia-gpu", From 23d8003907d094f77cf959228e2248d6db819fa7 Mon Sep 17 00:00:00 2001 From: Stanislav Lisovskiy <stanislav.lisovskiy@intel.com> Date: Fri, 9 Nov 2018 11:00:12 +0200 Subject: [PATCH 327/443] drm/dp_mst: Check if primary mstb is null Unfortunately drm_dp_get_mst_branch_device which is called from both drm_dp_mst_handle_down_rep and drm_dp_mst_handle_up_rep seem to rely on that mgr->mst_primary is not NULL, which seem to be wrong as it can be cleared with simultaneous mode set, if probing fails or in other case. mgr->lock mutex doesn't protect against that as it might just get assigned to NULL right before, not simultaneously. There are currently bugs 107738, 108616 bugs which crash in drm_dp_get_mst_branch_device, caused by this issue. v2: Refactored the code, as it was nicely noticed. Fixed Bugzilla bug numbers(second was 108616, but not 108816) and added links. [changed title and added stable cc] Signed-off-by: Lyude Paul <lyude@redhat.com> Signed-off-by: Stanislav Lisovskiy <stanislav.lisovskiy@intel.com> Cc: stable@vger.kernel.org Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=108616 Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=107738 Link: https://patchwork.freedesktop.org/patch/msgid/20181109090012.24438-1-stanislav.lisovskiy@intel.com --- drivers/gpu/drm/drm_dp_mst_topology.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/gpu/drm/drm_dp_mst_topology.c b/drivers/gpu/drm/drm_dp_mst_topology.c index 5ff1d79b86c4a..0e0df398222d1 100644 --- a/drivers/gpu/drm/drm_dp_mst_topology.c +++ b/drivers/gpu/drm/drm_dp_mst_topology.c @@ -1275,6 +1275,9 @@ static struct drm_dp_mst_branch *drm_dp_get_mst_branch_device(struct drm_dp_mst_ mutex_lock(&mgr->lock); mstb = mgr->mst_primary; + if (!mstb) + goto out; + for (i = 0; i < lct - 1; i++) { int shift = (i % 2) ? 0 : 4; int port_num = (rad[i / 2] >> shift) & 0xf; From 641a41dbba217ee5bd26abe6be77f8cead9cd00e Mon Sep 17 00:00:00 2001 From: Yoshihiro Shimoda <yoshihiro.shimoda.uh@renesas.com> Date: Tue, 30 Oct 2018 15:13:35 +0900 Subject: [PATCH 328/443] serial: sh-sci: Fix could not remove dev_attr_rx_fifo_timeout This patch fixes an issue that the sci_remove() could not remove dev_attr_rx_fifo_timeout because uart_remove_one_port() set the port->port.type to PORT_UNKNOWN. Reported-by: Hiromitsu Yamasaki <hiromitsu.yamasaki.ym@renesas.com> Fixes: 5d23188a473d ("serial: sh-sci: make RX FIFO parameters tunable via sysfs") Cc: <stable@vger.kernel.org> # v4.11+ Signed-off-by: Yoshihiro Shimoda <yoshihiro.shimoda.uh@renesas.com> Reviewed-by: Ulrich Hecht <uli+renesas@fpond.eu> Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> --- drivers/tty/serial/sh-sci.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/tty/serial/sh-sci.c b/drivers/tty/serial/sh-sci.c index e19bfbba8a019..effba6ce0caa2 100644 --- a/drivers/tty/serial/sh-sci.c +++ b/drivers/tty/serial/sh-sci.c @@ -3102,6 +3102,7 @@ static struct uart_driver sci_uart_driver = { static int sci_remove(struct platform_device *dev) { struct sci_port *port = platform_get_drvdata(dev); + unsigned int type = port->port.type; /* uart_remove_... clears it */ sci_ports_in_use &= ~BIT(port->port.line); uart_remove_one_port(&sci_uart_driver, &port->port); @@ -3112,8 +3113,7 @@ static int sci_remove(struct platform_device *dev) sysfs_remove_file(&dev->dev.kobj, &dev_attr_rx_fifo_trigger.attr); } - if (port->port.type == PORT_SCIFA || port->port.type == PORT_SCIFB || - port->port.type == PORT_HSCIF) { + if (type == PORT_SCIFA || type == PORT_SCIFB || type == PORT_HSCIF) { sysfs_remove_file(&dev->dev.kobj, &dev_attr_rx_fifo_timeout.attr); } From 247c554a14aa16ca08f4ed4d9eb39a2389f69d1d Mon Sep 17 00:00:00 2001 From: Ajay Gupta <ajayg@nvidia.com> Date: Fri, 26 Oct 2018 09:36:59 -0700 Subject: [PATCH 329/443] usb: typec: ucsi: add support for Cypress CCGx Latest NVIDIA GPU cards have a Cypress CCGx Type-C controller over I2C interface. This UCSI I2C driver uses I2C bus driver interface for communicating with Type-C controller. Signed-off-by: Ajay Gupta <ajayg@nvidia.com> Acked-by: Heikki Krogerus <heikki.krogerus@linux.intel.com> Signed-off-by: Wolfram Sang <wsa@the-dreams.de> --- drivers/usb/typec/ucsi/Kconfig | 10 + drivers/usb/typec/ucsi/Makefile | 2 + drivers/usb/typec/ucsi/ucsi_ccg.c | 307 ++++++++++++++++++++++++++++++ 3 files changed, 319 insertions(+) create mode 100644 drivers/usb/typec/ucsi/ucsi_ccg.c diff --git a/drivers/usb/typec/ucsi/Kconfig b/drivers/usb/typec/ucsi/Kconfig index e36d6c73c4a41..78118883f96c8 100644 --- a/drivers/usb/typec/ucsi/Kconfig +++ b/drivers/usb/typec/ucsi/Kconfig @@ -23,6 +23,16 @@ config TYPEC_UCSI if TYPEC_UCSI +config UCSI_CCG + tristate "UCSI Interface Driver for Cypress CCGx" + depends on I2C + help + This driver enables UCSI support on platforms that expose a + Cypress CCGx Type-C controller over I2C interface. + + To compile the driver as a module, choose M here: the module will be + called ucsi_ccg. + config UCSI_ACPI tristate "UCSI ACPI Interface Driver" depends on ACPI diff --git a/drivers/usb/typec/ucsi/Makefile b/drivers/usb/typec/ucsi/Makefile index 7afbea5122077..2f4900b26210e 100644 --- a/drivers/usb/typec/ucsi/Makefile +++ b/drivers/usb/typec/ucsi/Makefile @@ -8,3 +8,5 @@ typec_ucsi-y := ucsi.o typec_ucsi-$(CONFIG_TRACING) += trace.o obj-$(CONFIG_UCSI_ACPI) += ucsi_acpi.o + +obj-$(CONFIG_UCSI_CCG) += ucsi_ccg.o diff --git a/drivers/usb/typec/ucsi/ucsi_ccg.c b/drivers/usb/typec/ucsi/ucsi_ccg.c new file mode 100644 index 0000000000000..de8a43bdff689 --- /dev/null +++ b/drivers/usb/typec/ucsi/ucsi_ccg.c @@ -0,0 +1,307 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * UCSI driver for Cypress CCGx Type-C controller + * + * Copyright (C) 2017-2018 NVIDIA Corporation. All rights reserved. + * Author: Ajay Gupta <ajayg@nvidia.com> + * + * Some code borrowed from drivers/usb/typec/ucsi/ucsi_acpi.c + */ +#include <linux/acpi.h> +#include <linux/delay.h> +#include <linux/i2c.h> +#include <linux/module.h> +#include <linux/pci.h> +#include <linux/platform_device.h> + +#include <asm/unaligned.h> +#include "ucsi.h" + +struct ucsi_ccg { + struct device *dev; + struct ucsi *ucsi; + struct ucsi_ppm ppm; + struct i2c_client *client; +}; + +#define CCGX_RAB_INTR_REG 0x06 +#define CCGX_RAB_UCSI_CONTROL 0x39 +#define CCGX_RAB_UCSI_CONTROL_START BIT(0) +#define CCGX_RAB_UCSI_CONTROL_STOP BIT(1) +#define CCGX_RAB_UCSI_DATA_BLOCK(offset) (0xf000 | ((offset) & 0xff)) + +static int ccg_read(struct ucsi_ccg *uc, u16 rab, u8 *data, u32 len) +{ + struct i2c_client *client = uc->client; + const struct i2c_adapter_quirks *quirks = client->adapter->quirks; + unsigned char buf[2]; + struct i2c_msg msgs[] = { + { + .addr = client->addr, + .flags = 0x0, + .len = sizeof(buf), + .buf = buf, + }, + { + .addr = client->addr, + .flags = I2C_M_RD, + .buf = data, + }, + }; + u32 rlen, rem_len = len, max_read_len = len; + int status; + + /* check any max_read_len limitation on i2c adapter */ + if (quirks && quirks->max_read_len) + max_read_len = quirks->max_read_len; + + while (rem_len > 0) { + msgs[1].buf = &data[len - rem_len]; + rlen = min_t(u16, rem_len, max_read_len); + msgs[1].len = rlen; + put_unaligned_le16(rab, buf); + status = i2c_transfer(client->adapter, msgs, ARRAY_SIZE(msgs)); + if (status < 0) { + dev_err(uc->dev, "i2c_transfer failed %d\n", status); + return status; + } + rab += rlen; + rem_len -= rlen; + } + + return 0; +} + +static int ccg_write(struct ucsi_ccg *uc, u16 rab, u8 *data, u32 len) +{ + struct i2c_client *client = uc->client; + unsigned char *buf; + struct i2c_msg msgs[] = { + { + .addr = client->addr, + .flags = 0x0, + } + }; + int status; + + buf = kzalloc(len + sizeof(rab), GFP_KERNEL); + if (!buf) + return -ENOMEM; + + put_unaligned_le16(rab, buf); + memcpy(buf + sizeof(rab), data, len); + + msgs[0].len = len + sizeof(rab); + msgs[0].buf = buf; + + status = i2c_transfer(client->adapter, msgs, ARRAY_SIZE(msgs)); + if (status < 0) { + dev_err(uc->dev, "i2c_transfer failed %d\n", status); + kfree(buf); + return status; + } + + kfree(buf); + return 0; +} + +static int ucsi_ccg_init(struct ucsi_ccg *uc) +{ + unsigned int count = 10; + u8 data; + int status; + + data = CCGX_RAB_UCSI_CONTROL_STOP; + status = ccg_write(uc, CCGX_RAB_UCSI_CONTROL, &data, sizeof(data)); + if (status < 0) + return status; + + data = CCGX_RAB_UCSI_CONTROL_START; + status = ccg_write(uc, CCGX_RAB_UCSI_CONTROL, &data, sizeof(data)); + if (status < 0) + return status; + + /* + * Flush CCGx RESPONSE queue by acking interrupts. Above ucsi control + * register write will push response which must be cleared. + */ + do { + status = ccg_read(uc, CCGX_RAB_INTR_REG, &data, sizeof(data)); + if (status < 0) + return status; + + if (!data) + return 0; + + status = ccg_write(uc, CCGX_RAB_INTR_REG, &data, sizeof(data)); + if (status < 0) + return status; + + usleep_range(10000, 11000); + } while (--count); + + return -ETIMEDOUT; +} + +static int ucsi_ccg_send_data(struct ucsi_ccg *uc) +{ + u8 *ppm = (u8 *)uc->ppm.data; + int status; + u16 rab; + + rab = CCGX_RAB_UCSI_DATA_BLOCK(offsetof(struct ucsi_data, message_out)); + status = ccg_write(uc, rab, ppm + + offsetof(struct ucsi_data, message_out), + sizeof(uc->ppm.data->message_out)); + if (status < 0) + return status; + + rab = CCGX_RAB_UCSI_DATA_BLOCK(offsetof(struct ucsi_data, ctrl)); + return ccg_write(uc, rab, ppm + offsetof(struct ucsi_data, ctrl), + sizeof(uc->ppm.data->ctrl)); +} + +static int ucsi_ccg_recv_data(struct ucsi_ccg *uc) +{ + u8 *ppm = (u8 *)uc->ppm.data; + int status; + u16 rab; + + rab = CCGX_RAB_UCSI_DATA_BLOCK(offsetof(struct ucsi_data, cci)); + status = ccg_read(uc, rab, ppm + offsetof(struct ucsi_data, cci), + sizeof(uc->ppm.data->cci)); + if (status < 0) + return status; + + rab = CCGX_RAB_UCSI_DATA_BLOCK(offsetof(struct ucsi_data, message_in)); + return ccg_read(uc, rab, ppm + offsetof(struct ucsi_data, message_in), + sizeof(uc->ppm.data->message_in)); +} + +static int ucsi_ccg_ack_interrupt(struct ucsi_ccg *uc) +{ + int status; + unsigned char data; + + status = ccg_read(uc, CCGX_RAB_INTR_REG, &data, sizeof(data)); + if (status < 0) + return status; + + return ccg_write(uc, CCGX_RAB_INTR_REG, &data, sizeof(data)); +} + +static int ucsi_ccg_sync(struct ucsi_ppm *ppm) +{ + struct ucsi_ccg *uc = container_of(ppm, struct ucsi_ccg, ppm); + int status; + + status = ucsi_ccg_recv_data(uc); + if (status < 0) + return status; + + /* ack interrupt to allow next command to run */ + return ucsi_ccg_ack_interrupt(uc); +} + +static int ucsi_ccg_cmd(struct ucsi_ppm *ppm, struct ucsi_control *ctrl) +{ + struct ucsi_ccg *uc = container_of(ppm, struct ucsi_ccg, ppm); + + ppm->data->ctrl.raw_cmd = ctrl->raw_cmd; + return ucsi_ccg_send_data(uc); +} + +static irqreturn_t ccg_irq_handler(int irq, void *data) +{ + struct ucsi_ccg *uc = data; + + ucsi_notify(uc->ucsi); + + return IRQ_HANDLED; +} + +static int ucsi_ccg_probe(struct i2c_client *client, + const struct i2c_device_id *id) +{ + struct device *dev = &client->dev; + struct ucsi_ccg *uc; + int status; + u16 rab; + + uc = devm_kzalloc(dev, sizeof(*uc), GFP_KERNEL); + if (!uc) + return -ENOMEM; + + uc->ppm.data = devm_kzalloc(dev, sizeof(struct ucsi_data), GFP_KERNEL); + if (!uc->ppm.data) + return -ENOMEM; + + uc->ppm.cmd = ucsi_ccg_cmd; + uc->ppm.sync = ucsi_ccg_sync; + uc->dev = dev; + uc->client = client; + + /* reset ccg device and initialize ucsi */ + status = ucsi_ccg_init(uc); + if (status < 0) { + dev_err(uc->dev, "ucsi_ccg_init failed - %d\n", status); + return status; + } + + status = devm_request_threaded_irq(dev, client->irq, NULL, + ccg_irq_handler, + IRQF_ONESHOT | IRQF_TRIGGER_HIGH, + dev_name(dev), uc); + if (status < 0) { + dev_err(uc->dev, "request_threaded_irq failed - %d\n", status); + return status; + } + + uc->ucsi = ucsi_register_ppm(dev, &uc->ppm); + if (IS_ERR(uc->ucsi)) { + dev_err(uc->dev, "ucsi_register_ppm failed\n"); + return PTR_ERR(uc->ucsi); + } + + rab = CCGX_RAB_UCSI_DATA_BLOCK(offsetof(struct ucsi_data, version)); + status = ccg_read(uc, rab, (u8 *)(uc->ppm.data) + + offsetof(struct ucsi_data, version), + sizeof(uc->ppm.data->version)); + if (status < 0) { + ucsi_unregister_ppm(uc->ucsi); + return status; + } + + i2c_set_clientdata(client, uc); + return 0; +} + +static int ucsi_ccg_remove(struct i2c_client *client) +{ + struct ucsi_ccg *uc = i2c_get_clientdata(client); + + ucsi_unregister_ppm(uc->ucsi); + + return 0; +} + +static const struct i2c_device_id ucsi_ccg_device_id[] = { + {"ccgx-ucsi", 0}, + {} +}; +MODULE_DEVICE_TABLE(i2c, ucsi_ccg_device_id); + +static struct i2c_driver ucsi_ccg_driver = { + .driver = { + .name = "ucsi_ccg", + }, + .probe = ucsi_ccg_probe, + .remove = ucsi_ccg_remove, + .id_table = ucsi_ccg_device_id, +}; + +module_i2c_driver(ucsi_ccg_driver); + +MODULE_AUTHOR("Ajay Gupta <ajayg@nvidia.com>"); +MODULE_DESCRIPTION("UCSI driver for Cypress CCGx Type-C controller"); +MODULE_LICENSE("GPL v2"); From 15035388439f892017d38b05214d3cda6578af64 Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (VMware)" <rostedt@goodmis.org> Date: Fri, 9 Nov 2018 15:22:07 -0500 Subject: [PATCH 330/443] x86/cpu/vmware: Do not trace vmware_sched_clock() When running function tracing on a Linux guest running on VMware Workstation, the guest would crash. This is due to tracing of the sched_clock internal call of the VMware vmware_sched_clock(), which causes an infinite recursion within the tracing code (clock calls must not be traced). Make vmware_sched_clock() not traced by ftrace. Fixes: 80e9a4f21fd7c ("x86/vmware: Add paravirt sched clock") Reported-by: GwanYeong Kim <gy741.kim@gmail.com> Signed-off-by: Steven Rostedt (VMware) <rostedt@goodmis.org> Signed-off-by: Borislav Petkov <bp@suse.de> CC: Alok Kataria <akataria@vmware.com> CC: GwanYeong Kim <gy741.kim@gmail.com> CC: "H. Peter Anvin" <hpa@zytor.com> CC: Ingo Molnar <mingo@kernel.org> Cc: stable@vger.kernel.org CC: Thomas Gleixner <tglx@linutronix.de> CC: virtualization@lists.linux-foundation.org CC: x86-ml <x86@kernel.org> Link: http://lkml.kernel.org/r/20181109152207.4d3e7d70@gandalf.local.home --- arch/x86/kernel/cpu/vmware.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/x86/kernel/cpu/vmware.c b/arch/x86/kernel/cpu/vmware.c index d9ab49bed8afc..0eda91f8eeace 100644 --- a/arch/x86/kernel/cpu/vmware.c +++ b/arch/x86/kernel/cpu/vmware.c @@ -77,7 +77,7 @@ static __init int setup_vmw_sched_clock(char *s) } early_param("no-vmw-sched-clock", setup_vmw_sched_clock); -static unsigned long long vmware_sched_clock(void) +static unsigned long long notrace vmware_sched_clock(void) { unsigned long long ns; From 1aefa98b010e9cc7a07046cbcb1237ddad85b708 Mon Sep 17 00:00:00 2001 From: Vinod Koul <vkoul@kernel.org> Date: Fri, 9 Nov 2018 15:20:54 +0530 Subject: [PATCH 331/443] clk: qcom: gcc: Fix board clock node name Device tree node name are not supposed to have "_" in them so fix the node name use of xo_board to xo-board Fixes: 652f1813c113 ("clk: qcom: gcc: Add global clock controller driver for QCS404") Signed-off-by: Vinod Koul <vkoul@kernel.org> Signed-off-by: Stephen Boyd <sboyd@kernel.org> --- drivers/clk/qcom/gcc-qcs404.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/clk/qcom/gcc-qcs404.c b/drivers/clk/qcom/gcc-qcs404.c index e4ca6a45f3139..ef1b267cb058a 100644 --- a/drivers/clk/qcom/gcc-qcs404.c +++ b/drivers/clk/qcom/gcc-qcs404.c @@ -265,7 +265,7 @@ static struct clk_fixed_factor cxo = { .div = 1, .hw.init = &(struct clk_init_data){ .name = "cxo", - .parent_names = (const char *[]){ "xo_board" }, + .parent_names = (const char *[]){ "xo-board" }, .num_parents = 1, .ops = &clk_fixed_factor_ops, }, From 35e8e8b45d31bec34379dd36e7b71448e003efb2 Mon Sep 17 00:00:00 2001 From: Igor Russkikh <Igor.Russkikh@aquantia.com> Date: Fri, 9 Nov 2018 11:53:56 +0000 Subject: [PATCH 332/443] net: aquantia: synchronized flow control between mac/phy Flow control statuses were not synchronized between blocks, that caused packets/link drop on some corner cases, when MAC sent PFC although Phy was not expecting these to come. Driver should readout the negotiated FC from phy and configure RX block accordigly. This is done on each link change event with information from FW. Fixes: 288551de45aa ("net: aquantia: Implement rx/tx flow control ethtools callback") Signed-off-by: Igor Russkikh <igor.russkikh@aquantia.com> Signed-off-by: David S. Miller <davem@davemloft.net> --- .../ethernet/aquantia/atlantic/aq_ethtool.c | 8 +++---- .../net/ethernet/aquantia/atlantic/aq_hw.h | 3 +++ .../net/ethernet/aquantia/atlantic/aq_nic.c | 14 ++++++++++++- .../aquantia/atlantic/hw_atl/hw_atl_b0.c | 12 ++++++++--- .../atlantic/hw_atl/hw_atl_utils_fw2x.c | 21 +++++++++++++++++++ 5 files changed, 50 insertions(+), 8 deletions(-) diff --git a/drivers/net/ethernet/aquantia/atlantic/aq_ethtool.c b/drivers/net/ethernet/aquantia/atlantic/aq_ethtool.c index 6a633c70f603d..99ef1daaa4d80 100644 --- a/drivers/net/ethernet/aquantia/atlantic/aq_ethtool.c +++ b/drivers/net/ethernet/aquantia/atlantic/aq_ethtool.c @@ -407,13 +407,13 @@ static void aq_ethtool_get_pauseparam(struct net_device *ndev, struct ethtool_pauseparam *pause) { struct aq_nic_s *aq_nic = netdev_priv(ndev); + u32 fc = aq_nic->aq_nic_cfg.flow_control; pause->autoneg = 0; - if (aq_nic->aq_hw->aq_nic_cfg->flow_control & AQ_NIC_FC_RX) - pause->rx_pause = 1; - if (aq_nic->aq_hw->aq_nic_cfg->flow_control & AQ_NIC_FC_TX) - pause->tx_pause = 1; + pause->rx_pause = !!(fc & AQ_NIC_FC_RX); + pause->tx_pause = !!(fc & AQ_NIC_FC_TX); + } static int aq_ethtool_set_pauseparam(struct net_device *ndev, diff --git a/drivers/net/ethernet/aquantia/atlantic/aq_hw.h b/drivers/net/ethernet/aquantia/atlantic/aq_hw.h index e8689241204e9..7ec8d24b2b0b8 100644 --- a/drivers/net/ethernet/aquantia/atlantic/aq_hw.h +++ b/drivers/net/ethernet/aquantia/atlantic/aq_hw.h @@ -204,6 +204,7 @@ struct aq_hw_ops { int (*hw_get_fw_version)(struct aq_hw_s *self, u32 *fw_version); + int (*hw_set_fc)(struct aq_hw_s *self, u32 fc, u32 tc); }; struct aq_fw_ops { @@ -226,6 +227,8 @@ struct aq_fw_ops { int (*update_stats)(struct aq_hw_s *self); + u32 (*get_flow_control)(struct aq_hw_s *self, u32 *fcmode); + int (*set_flow_control)(struct aq_hw_s *self); int (*set_power)(struct aq_hw_s *self, unsigned int power_state, diff --git a/drivers/net/ethernet/aquantia/atlantic/aq_nic.c b/drivers/net/ethernet/aquantia/atlantic/aq_nic.c index 5fed244466871..0011a3f2f6727 100644 --- a/drivers/net/ethernet/aquantia/atlantic/aq_nic.c +++ b/drivers/net/ethernet/aquantia/atlantic/aq_nic.c @@ -124,6 +124,7 @@ void aq_nic_cfg_start(struct aq_nic_s *self) static int aq_nic_update_link_status(struct aq_nic_s *self) { int err = self->aq_fw_ops->update_link_status(self->aq_hw); + u32 fc = 0; if (err) return err; @@ -133,6 +134,15 @@ static int aq_nic_update_link_status(struct aq_nic_s *self) AQ_CFG_DRV_NAME, self->link_status.mbps, self->aq_hw->aq_link_status.mbps); aq_nic_update_interrupt_moderation_settings(self); + + /* Driver has to update flow control settings on RX block + * on any link event. + * We should query FW whether it negotiated FC. + */ + if (self->aq_fw_ops->get_flow_control) + self->aq_fw_ops->get_flow_control(self->aq_hw, &fc); + if (self->aq_hw_ops->hw_set_fc) + self->aq_hw_ops->hw_set_fc(self->aq_hw, fc, 0); } self->link_status = self->aq_hw->aq_link_status; @@ -772,7 +782,9 @@ void aq_nic_get_link_ksettings(struct aq_nic_s *self, ethtool_link_ksettings_add_link_mode(cmd, advertising, Pause); - if (self->aq_nic_cfg.flow_control & AQ_NIC_FC_TX) + /* Asym is when either RX or TX, but not both */ + if (!!(self->aq_nic_cfg.flow_control & AQ_NIC_FC_TX) ^ + !!(self->aq_nic_cfg.flow_control & AQ_NIC_FC_RX)) ethtool_link_ksettings_add_link_mode(cmd, advertising, Asym_Pause); diff --git a/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_b0.c b/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_b0.c index 76d25d594a0f6..119265762b0c4 100644 --- a/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_b0.c +++ b/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_b0.c @@ -100,12 +100,17 @@ static int hw_atl_b0_hw_reset(struct aq_hw_s *self) return err; } +static int hw_atl_b0_set_fc(struct aq_hw_s *self, u32 fc, u32 tc) +{ + hw_atl_rpb_rx_xoff_en_per_tc_set(self, !!(fc & AQ_NIC_FC_RX), tc); + return 0; +} + static int hw_atl_b0_hw_qos_set(struct aq_hw_s *self) { u32 tc = 0U; u32 buff_size = 0U; unsigned int i_priority = 0U; - bool is_rx_flow_control = false; /* TPS Descriptor rate init */ hw_atl_tps_tx_pkt_shed_desc_rate_curr_time_res_set(self, 0x0U); @@ -138,7 +143,6 @@ static int hw_atl_b0_hw_qos_set(struct aq_hw_s *self) /* QoS Rx buf size per TC */ tc = 0; - is_rx_flow_control = (AQ_NIC_FC_RX & self->aq_nic_cfg->flow_control); buff_size = HW_ATL_B0_RXBUF_MAX; hw_atl_rpb_rx_pkt_buff_size_per_tc_set(self, buff_size, tc); @@ -150,7 +154,8 @@ static int hw_atl_b0_hw_qos_set(struct aq_hw_s *self) (buff_size * (1024U / 32U) * 50U) / 100U, tc); - hw_atl_rpb_rx_xoff_en_per_tc_set(self, is_rx_flow_control ? 1U : 0U, tc); + + hw_atl_b0_set_fc(self, self->aq_nic_cfg->flow_control, tc); /* QoS 802.1p priority -> TC mapping */ for (i_priority = 8U; i_priority--;) @@ -963,4 +968,5 @@ const struct aq_hw_ops hw_atl_ops_b0 = { .hw_get_regs = hw_atl_utils_hw_get_regs, .hw_get_hw_stats = hw_atl_utils_get_hw_stats, .hw_get_fw_version = hw_atl_utils_get_fw_version, + .hw_set_fc = hw_atl_b0_set_fc, }; diff --git a/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_utils_fw2x.c b/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_utils_fw2x.c index 096ca5730887c..7de3220d9cab7 100644 --- a/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_utils_fw2x.c +++ b/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_utils_fw2x.c @@ -30,6 +30,8 @@ #define HW_ATL_FW2X_MPI_STATE_ADDR 0x370 #define HW_ATL_FW2X_MPI_STATE2_ADDR 0x374 +#define HW_ATL_FW2X_CAP_PAUSE BIT(CAPS_HI_PAUSE) +#define HW_ATL_FW2X_CAP_ASYM_PAUSE BIT(CAPS_HI_ASYMMETRIC_PAUSE) #define HW_ATL_FW2X_CAP_SLEEP_PROXY BIT(CAPS_HI_SLEEP_PROXY) #define HW_ATL_FW2X_CAP_WOL BIT(CAPS_HI_WOL) @@ -451,6 +453,24 @@ static int aq_fw2x_set_flow_control(struct aq_hw_s *self) return 0; } +static u32 aq_fw2x_get_flow_control(struct aq_hw_s *self, u32 *fcmode) +{ + u32 mpi_state = aq_hw_read_reg(self, HW_ATL_FW2X_MPI_STATE2_ADDR); + + if (mpi_state & HW_ATL_FW2X_CAP_PAUSE) + if (mpi_state & HW_ATL_FW2X_CAP_ASYM_PAUSE) + *fcmode = AQ_NIC_FC_RX; + else + *fcmode = AQ_NIC_FC_RX | AQ_NIC_FC_TX; + else + if (mpi_state & HW_ATL_FW2X_CAP_ASYM_PAUSE) + *fcmode = AQ_NIC_FC_TX; + else + *fcmode = 0; + + return 0; +} + const struct aq_fw_ops aq_fw_2x_ops = { .init = aq_fw2x_init, .deinit = aq_fw2x_deinit, @@ -465,4 +485,5 @@ const struct aq_fw_ops aq_fw_2x_ops = { .set_eee_rate = aq_fw2x_set_eee_rate, .get_eee_rate = aq_fw2x_get_eee_rate, .set_flow_control = aq_fw2x_set_flow_control, + .get_flow_control = aq_fw2x_get_flow_control }; From 7a1bb49461b12b2e6332a4d054256835f45203f3 Mon Sep 17 00:00:00 2001 From: Dmitry Bogdanov <dmitry.bogdanov@aquantia.com> Date: Fri, 9 Nov 2018 11:53:57 +0000 Subject: [PATCH 333/443] net: aquantia: fix potential IOMMU fault after driver unbind IOMMU fault may occurr on unbind/bind or if_down/if_up sequence. Although driver disables the rings on down, this is not enough. Due to internal HW design, during subsequent initialization NIC sometimes may reuse RX descriptors cache and write to the host memory from the descriptor cache. That's get catched by IOMMU on host. This patch invalidates the descriptor cache in NIC on interface down to prevent writing to the cached descriptors and to the memory pointed in those descriptors. Signed-off-by: Dmitry Bogdanov <dmitry.bogdanov@aquantia.com> Signed-off-by: Igor Russkikh <igor.russkikh@aquantia.com> Signed-off-by: David S. Miller <davem@davemloft.net> --- .../aquantia/atlantic/hw_atl/hw_atl_b0.c | 6 ++++++ .../aquantia/atlantic/hw_atl/hw_atl_llh.c | 8 ++++++++ .../aquantia/atlantic/hw_atl/hw_atl_llh.h | 3 +++ .../atlantic/hw_atl/hw_atl_llh_internal.h | 18 ++++++++++++++++++ 4 files changed, 35 insertions(+) diff --git a/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_b0.c b/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_b0.c index 119265762b0c4..3aec56623bf55 100644 --- a/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_b0.c +++ b/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_b0.c @@ -920,6 +920,12 @@ static int hw_atl_b0_hw_interrupt_moderation_set(struct aq_hw_s *self) static int hw_atl_b0_hw_stop(struct aq_hw_s *self) { hw_atl_b0_hw_irq_disable(self, HW_ATL_B0_INT_MASK); + + /* Invalidate Descriptor Cache to prevent writing to the cached + * descriptors and to the data pointer of those descriptors + */ + hw_atl_rdm_rx_dma_desc_cache_init_set(self, 1); + return aq_hw_err_from_flags(self); } diff --git a/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_llh.c b/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_llh.c index be0a3a90dfad6..5502ec5f0f699 100644 --- a/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_llh.c +++ b/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_llh.c @@ -619,6 +619,14 @@ void hw_atl_rpb_rx_flow_ctl_mode_set(struct aq_hw_s *aq_hw, u32 rx_flow_ctl_mode HW_ATL_RPB_RX_FC_MODE_SHIFT, rx_flow_ctl_mode); } +void hw_atl_rdm_rx_dma_desc_cache_init_set(struct aq_hw_s *aq_hw, u32 init) +{ + aq_hw_write_reg_bit(aq_hw, HW_ATL_RDM_RX_DMA_DESC_CACHE_INIT_ADR, + HW_ATL_RDM_RX_DMA_DESC_CACHE_INIT_MSK, + HW_ATL_RDM_RX_DMA_DESC_CACHE_INIT_SHIFT, + init); +} + void hw_atl_rpb_rx_pkt_buff_size_per_tc_set(struct aq_hw_s *aq_hw, u32 rx_pkt_buff_size_per_tc, u32 buffer) { diff --git a/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_llh.h b/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_llh.h index 7056c7342afcf..41f239928c157 100644 --- a/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_llh.h +++ b/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_llh.h @@ -325,6 +325,9 @@ void hw_atl_rpb_rx_pkt_buff_size_per_tc_set(struct aq_hw_s *aq_hw, u32 rx_pkt_buff_size_per_tc, u32 buffer); +/* set rdm rx dma descriptor cache init */ +void hw_atl_rdm_rx_dma_desc_cache_init_set(struct aq_hw_s *aq_hw, u32 init); + /* set rx xoff enable (per tc) */ void hw_atl_rpb_rx_xoff_en_per_tc_set(struct aq_hw_s *aq_hw, u32 rx_xoff_en_per_tc, u32 buffer); diff --git a/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_llh_internal.h b/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_llh_internal.h index 716674a9b729e..a715fa317b1c8 100644 --- a/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_llh_internal.h +++ b/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_llh_internal.h @@ -293,6 +293,24 @@ /* default value of bitfield desc{d}_reset */ #define HW_ATL_RDM_DESCDRESET_DEFAULT 0x0 +/* rdm_desc_init_i bitfield definitions + * preprocessor definitions for the bitfield rdm_desc_init_i. + * port="pif_rdm_desc_init_i" + */ + +/* register address for bitfield rdm_desc_init_i */ +#define HW_ATL_RDM_RX_DMA_DESC_CACHE_INIT_ADR 0x00005a00 +/* bitmask for bitfield rdm_desc_init_i */ +#define HW_ATL_RDM_RX_DMA_DESC_CACHE_INIT_MSK 0xffffffff +/* inverted bitmask for bitfield rdm_desc_init_i */ +#define HW_ATL_RDM_RX_DMA_DESC_CACHE_INIT_MSKN 0x00000000 +/* lower bit position of bitfield rdm_desc_init_i */ +#define HW_ATL_RDM_RX_DMA_DESC_CACHE_INIT_SHIFT 0 +/* width of bitfield rdm_desc_init_i */ +#define HW_ATL_RDM_RX_DMA_DESC_CACHE_INIT_WIDTH 32 +/* default value of bitfield rdm_desc_init_i */ +#define HW_ATL_RDM_RX_DMA_DESC_CACHE_INIT_DEFAULT 0x0 + /* rx int_desc_wrb_en bitfield definitions * preprocessor definitions for the bitfield "int_desc_wrb_en". * port="pif_rdm_int_desc_wrb_en_i" From bfaa9f8553d5c20703781e63f4fc8cb4792f18fd Mon Sep 17 00:00:00 2001 From: Igor Russkikh <Igor.Russkikh@aquantia.com> Date: Fri, 9 Nov 2018 11:53:59 +0000 Subject: [PATCH 334/443] net: aquantia: fixed enable unicast on 32 macvlan Fixed a condition mistake due to which macvlans unicast item number 32 was not added in the unicast filter. The consequence is that when exactly 32 macvlans are created on NIC, the last created macvlan receives no traffic because its MAC was not registered in HW. Fixes: 94b3b542303f ("net: aquantia: vlan unicast address list correct handling") Signed-off-by: Igor Russkikh <igor.russkikh@aquantia.com> Tested-by: Nikita Danilov <nikita.danilov@aquantia.com> Signed-off-by: David S. Miller <davem@davemloft.net> --- drivers/net/ethernet/aquantia/atlantic/aq_nic.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/aquantia/atlantic/aq_nic.c b/drivers/net/ethernet/aquantia/atlantic/aq_nic.c index 0011a3f2f6727..b5e7c98f424ca 100644 --- a/drivers/net/ethernet/aquantia/atlantic/aq_nic.c +++ b/drivers/net/ethernet/aquantia/atlantic/aq_nic.c @@ -600,7 +600,7 @@ int aq_nic_set_multicast_list(struct aq_nic_s *self, struct net_device *ndev) } } - if (i > 0 && i < AQ_HW_MULTICAST_ADDRESS_MAX) { + if (i > 0 && i <= AQ_HW_MULTICAST_ADDRESS_MAX) { packet_filter |= IFF_MULTICAST; self->mc_list.count = i; self->aq_hw_ops->hw_multicast_list_set(self->aq_hw, From ad703c2b9127f9acdef697ec4755f6da4beaa266 Mon Sep 17 00:00:00 2001 From: Dmitry Bogdanov <dmitry.bogdanov@aquantia.com> Date: Fri, 9 Nov 2018 11:54:01 +0000 Subject: [PATCH 335/443] net: aquantia: invalid checksumm offload implementation Packets with marked invalid IP/UDP/TCP checksums were considered as good by the driver. The error was in a logic, processing offload bits in RX descriptor. Signed-off-by: Igor Russkikh <igor.russkikh@aquantia.com> Signed-off-by: Dmitry Bogdanov <dmitry.bogdanov@aquantia.com> Signed-off-by: David S. Miller <davem@davemloft.net> --- .../net/ethernet/aquantia/atlantic/aq_ring.c | 35 +++++++++++------- .../aquantia/atlantic/hw_atl/hw_atl_b0.c | 36 +++++++++---------- 2 files changed, 41 insertions(+), 30 deletions(-) diff --git a/drivers/net/ethernet/aquantia/atlantic/aq_ring.c b/drivers/net/ethernet/aquantia/atlantic/aq_ring.c index 3db91446cc677..74550ccc7a20f 100644 --- a/drivers/net/ethernet/aquantia/atlantic/aq_ring.c +++ b/drivers/net/ethernet/aquantia/atlantic/aq_ring.c @@ -172,6 +172,27 @@ bool aq_ring_tx_clean(struct aq_ring_s *self) return !!budget; } +static void aq_rx_checksum(struct aq_ring_s *self, + struct aq_ring_buff_s *buff, + struct sk_buff *skb) +{ + if (!(self->aq_nic->ndev->features & NETIF_F_RXCSUM)) + return; + + if (unlikely(buff->is_cso_err)) { + ++self->stats.rx.errors; + skb->ip_summed = CHECKSUM_NONE; + return; + } + if (buff->is_ip_cso) { + __skb_incr_checksum_unnecessary(skb); + if (buff->is_udp_cso || buff->is_tcp_cso) + __skb_incr_checksum_unnecessary(skb); + } else { + skb->ip_summed = CHECKSUM_NONE; + } +} + #define AQ_SKB_ALIGN SKB_DATA_ALIGN(sizeof(struct skb_shared_info)) int aq_ring_rx_clean(struct aq_ring_s *self, struct napi_struct *napi, @@ -267,18 +288,8 @@ int aq_ring_rx_clean(struct aq_ring_s *self, } skb->protocol = eth_type_trans(skb, ndev); - if (unlikely(buff->is_cso_err)) { - ++self->stats.rx.errors; - skb->ip_summed = CHECKSUM_NONE; - } else { - if (buff->is_ip_cso) { - __skb_incr_checksum_unnecessary(skb); - if (buff->is_udp_cso || buff->is_tcp_cso) - __skb_incr_checksum_unnecessary(skb); - } else { - skb->ip_summed = CHECKSUM_NONE; - } - } + + aq_rx_checksum(self, buff, skb); skb_set_hash(skb, buff->rss_hash, buff->is_hash_l4 ? PKT_HASH_TYPE_L4 : diff --git a/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_b0.c b/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_b0.c index 3aec56623bf55..179ce12fe4d8d 100644 --- a/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_b0.c +++ b/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_b0.c @@ -660,9 +660,9 @@ static int hw_atl_b0_hw_ring_rx_receive(struct aq_hw_s *self, struct hw_atl_rxd_wb_s *rxd_wb = (struct hw_atl_rxd_wb_s *) &ring->dx_ring[ring->hw_head * HW_ATL_B0_RXD_SIZE]; - unsigned int is_err = 1U; unsigned int is_rx_check_sum_enabled = 0U; unsigned int pkt_type = 0U; + u8 rx_stat = 0U; if (!(rxd_wb->status & 0x1U)) { /* RxD is not done */ break; @@ -670,35 +670,35 @@ static int hw_atl_b0_hw_ring_rx_receive(struct aq_hw_s *self, buff = &ring->buff_ring[ring->hw_head]; - is_err = (0x0000003CU & rxd_wb->status); + rx_stat = (0x0000003CU & rxd_wb->status) >> 2; is_rx_check_sum_enabled = (rxd_wb->type) & (0x3U << 19); - is_err &= ~0x20U; /* exclude validity bit */ pkt_type = 0xFFU & (rxd_wb->type >> 4); - if (is_rx_check_sum_enabled) { - if (0x0U == (pkt_type & 0x3U)) - buff->is_ip_cso = (is_err & 0x08U) ? 0U : 1U; + if (is_rx_check_sum_enabled & BIT(0) && + (0x0U == (pkt_type & 0x3U))) + buff->is_ip_cso = (rx_stat & BIT(1)) ? 0U : 1U; + if (is_rx_check_sum_enabled & BIT(1)) { if (0x4U == (pkt_type & 0x1CU)) - buff->is_udp_cso = buff->is_cso_err ? 0U : 1U; + buff->is_udp_cso = (rx_stat & BIT(2)) ? 0U : + !!(rx_stat & BIT(3)); else if (0x0U == (pkt_type & 0x1CU)) - buff->is_tcp_cso = buff->is_cso_err ? 0U : 1U; - - /* Checksum offload workaround for small packets */ - if (rxd_wb->pkt_len <= 60) { - buff->is_ip_cso = 0U; - buff->is_cso_err = 0U; - } + buff->is_tcp_cso = (rx_stat & BIT(2)) ? 0U : + !!(rx_stat & BIT(3)); + } + buff->is_cso_err = !!(rx_stat & 0x6); + /* Checksum offload workaround for small packets */ + if (unlikely(rxd_wb->pkt_len <= 60)) { + buff->is_ip_cso = 0U; + buff->is_cso_err = 0U; } - - is_err &= ~0x18U; dma_unmap_page(ndev, buff->pa, buff->len, DMA_FROM_DEVICE); - if (is_err || rxd_wb->type & 0x1000U) { - /* status error or DMA error */ + if ((rx_stat & BIT(0)) || rxd_wb->type & 0x1000U) { + /* MAC error or DMA error */ buff->is_error = 1U; } else { if (self->aq_nic_cfg->is_rss) { From bbb67a44baf973da734b9fd61cba4211da240751 Mon Sep 17 00:00:00 2001 From: Dmitry Bogdanov <dmitry.bogdanov@aquantia.com> Date: Fri, 9 Nov 2018 11:54:03 +0000 Subject: [PATCH 336/443] net: aquantia: allow rx checksum offload configuration RX Checksum offloads could not be configured and ignored netdev features flag for checksumming. Signed-off-by: Igor Russkikh <igor.russkikh@aquantia.com> Signed-off-by: Dmitry Bogdanov <dmitry.bogdanov@aquantia.com> Signed-off-by: David S. Miller <davem@davemloft.net> --- drivers/net/ethernet/aquantia/atlantic/aq_hw.h | 3 +++ drivers/net/ethernet/aquantia/atlantic/aq_main.c | 10 ++++++++-- drivers/net/ethernet/aquantia/atlantic/aq_nic.c | 2 +- drivers/net/ethernet/aquantia/atlantic/aq_nic.h | 2 +- .../net/ethernet/aquantia/atlantic/hw_atl/hw_atl_b0.c | 7 +++++-- 5 files changed, 18 insertions(+), 6 deletions(-) diff --git a/drivers/net/ethernet/aquantia/atlantic/aq_hw.h b/drivers/net/ethernet/aquantia/atlantic/aq_hw.h index 7ec8d24b2b0b8..a1e70da358ca6 100644 --- a/drivers/net/ethernet/aquantia/atlantic/aq_hw.h +++ b/drivers/net/ethernet/aquantia/atlantic/aq_hw.h @@ -204,6 +204,9 @@ struct aq_hw_ops { int (*hw_get_fw_version)(struct aq_hw_s *self, u32 *fw_version); + int (*hw_set_offload)(struct aq_hw_s *self, + struct aq_nic_cfg_s *aq_nic_cfg); + int (*hw_set_fc)(struct aq_hw_s *self, u32 fc, u32 tc); }; diff --git a/drivers/net/ethernet/aquantia/atlantic/aq_main.c b/drivers/net/ethernet/aquantia/atlantic/aq_main.c index e3ae29e523f0e..7c07eef275eb8 100644 --- a/drivers/net/ethernet/aquantia/atlantic/aq_main.c +++ b/drivers/net/ethernet/aquantia/atlantic/aq_main.c @@ -99,8 +99,11 @@ static int aq_ndev_set_features(struct net_device *ndev, struct aq_nic_s *aq_nic = netdev_priv(ndev); struct aq_nic_cfg_s *aq_cfg = aq_nic_get_cfg(aq_nic); bool is_lro = false; + int err = 0; + + aq_cfg->features = features; - if (aq_cfg->hw_features & NETIF_F_LRO) { + if (aq_cfg->aq_hw_caps->hw_features & NETIF_F_LRO) { is_lro = features & NETIF_F_LRO; if (aq_cfg->is_lro != is_lro) { @@ -112,8 +115,11 @@ static int aq_ndev_set_features(struct net_device *ndev, } } } + if ((aq_nic->ndev->features ^ features) & NETIF_F_RXCSUM) + err = aq_nic->aq_hw_ops->hw_set_offload(aq_nic->aq_hw, + aq_cfg); - return 0; + return err; } static int aq_ndev_set_mac_address(struct net_device *ndev, void *addr) diff --git a/drivers/net/ethernet/aquantia/atlantic/aq_nic.c b/drivers/net/ethernet/aquantia/atlantic/aq_nic.c index b5e7c98f424ca..7abdc09524259 100644 --- a/drivers/net/ethernet/aquantia/atlantic/aq_nic.c +++ b/drivers/net/ethernet/aquantia/atlantic/aq_nic.c @@ -118,7 +118,7 @@ void aq_nic_cfg_start(struct aq_nic_s *self) } cfg->link_speed_msk &= cfg->aq_hw_caps->link_speed_msk; - cfg->hw_features = cfg->aq_hw_caps->hw_features; + cfg->features = cfg->aq_hw_caps->hw_features; } static int aq_nic_update_link_status(struct aq_nic_s *self) diff --git a/drivers/net/ethernet/aquantia/atlantic/aq_nic.h b/drivers/net/ethernet/aquantia/atlantic/aq_nic.h index c1582f4e8e1b5..44ec47a3d60a5 100644 --- a/drivers/net/ethernet/aquantia/atlantic/aq_nic.h +++ b/drivers/net/ethernet/aquantia/atlantic/aq_nic.h @@ -23,7 +23,7 @@ struct aq_vec_s; struct aq_nic_cfg_s { const struct aq_hw_caps_s *aq_hw_caps; - u64 hw_features; + u64 features; u32 rxds; /* rx ring size, descriptors # */ u32 txds; /* tx ring size, descriptors # */ u32 vecs; /* vecs==allocated irqs */ diff --git a/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_b0.c b/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_b0.c index 179ce12fe4d8d..f02592f43fe36 100644 --- a/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_b0.c +++ b/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_b0.c @@ -234,8 +234,10 @@ static int hw_atl_b0_hw_offload_set(struct aq_hw_s *self, hw_atl_tpo_tcp_udp_crc_offload_en_set(self, 1); /* RX checksums offloads*/ - hw_atl_rpo_ipv4header_crc_offload_en_set(self, 1); - hw_atl_rpo_tcp_udp_crc_offload_en_set(self, 1); + hw_atl_rpo_ipv4header_crc_offload_en_set(self, !!(aq_nic_cfg->features & + NETIF_F_RXCSUM)); + hw_atl_rpo_tcp_udp_crc_offload_en_set(self, !!(aq_nic_cfg->features & + NETIF_F_RXCSUM)); /* LSO offloads*/ hw_atl_tdm_large_send_offload_en_set(self, 0xFFFFFFFFU); @@ -974,5 +976,6 @@ const struct aq_hw_ops hw_atl_ops_b0 = { .hw_get_regs = hw_atl_utils_hw_get_regs, .hw_get_hw_stats = hw_atl_utils_get_hw_stats, .hw_get_fw_version = hw_atl_utils_get_fw_version, + .hw_set_offload = hw_atl_b0_hw_offload_set, .hw_set_fc = hw_atl_b0_set_fc, }; From d02854dc1999ed3e7fd79ec700c64ac23ac0c458 Mon Sep 17 00:00:00 2001 From: Subash Abhinov Kasiviswanathan <subashab@codeaurora.org> Date: Fri, 9 Nov 2018 18:56:27 -0700 Subject: [PATCH 337/443] net: qualcomm: rmnet: Fix incorrect assignment of real_dev A null dereference was observed when a sysctl was being set from userspace and rmnet was stuck trying to complete some actions in the NETDEV_REGISTER callback. This is because the real_dev is set only after the device registration handler completes. sysctl call stack - <6> Unable to handle kernel NULL pointer dereference at virtual address 00000108 <2> pc : rmnet_vnd_get_iflink+0x1c/0x28 <2> lr : dev_get_iflink+0x2c/0x40 <2> rmnet_vnd_get_iflink+0x1c/0x28 <2> inet6_fill_ifinfo+0x15c/0x234 <2> inet6_ifinfo_notify+0x68/0xd4 <2> ndisc_ifinfo_sysctl_change+0x1b8/0x234 <2> proc_sys_call_handler+0xac/0x100 <2> proc_sys_write+0x3c/0x4c <2> __vfs_write+0x54/0x14c <2> vfs_write+0xcc/0x188 <2> SyS_write+0x60/0xc0 <2> el0_svc_naked+0x34/0x38 device register call stack - <2> notifier_call_chain+0x84/0xbc <2> raw_notifier_call_chain+0x38/0x48 <2> call_netdevice_notifiers_info+0x40/0x70 <2> call_netdevice_notifiers+0x38/0x60 <2> register_netdevice+0x29c/0x3d8 <2> rmnet_vnd_newlink+0x68/0xe8 <2> rmnet_newlink+0xa0/0x160 <2> rtnl_newlink+0x57c/0x6c8 <2> rtnetlink_rcv_msg+0x1dc/0x328 <2> netlink_rcv_skb+0xac/0x118 <2> rtnetlink_rcv+0x24/0x30 <2> netlink_unicast+0x158/0x1f0 <2> netlink_sendmsg+0x32c/0x338 <2> sock_sendmsg+0x44/0x60 <2> SyS_sendto+0x150/0x1ac <2> el0_svc_naked+0x34/0x38 Fixes: b752eff5be24 ("net: qualcomm: rmnet: Implement ndo_get_iflink") Signed-off-by: Sean Tranchetti <stranche@codeaurora.org> Signed-off-by: Subash Abhinov Kasiviswanathan <subashab@codeaurora.org> Signed-off-by: David S. Miller <davem@davemloft.net> --- drivers/net/ethernet/qualcomm/rmnet/rmnet_vnd.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/net/ethernet/qualcomm/rmnet/rmnet_vnd.c b/drivers/net/ethernet/qualcomm/rmnet/rmnet_vnd.c index 0afc3d335d562..d11c16aeb19ad 100644 --- a/drivers/net/ethernet/qualcomm/rmnet/rmnet_vnd.c +++ b/drivers/net/ethernet/qualcomm/rmnet/rmnet_vnd.c @@ -234,7 +234,7 @@ int rmnet_vnd_newlink(u8 id, struct net_device *rmnet_dev, struct net_device *real_dev, struct rmnet_endpoint *ep) { - struct rmnet_priv *priv; + struct rmnet_priv *priv = netdev_priv(rmnet_dev); int rc; if (ep->egress_dev) @@ -247,6 +247,8 @@ int rmnet_vnd_newlink(u8 id, struct net_device *rmnet_dev, rmnet_dev->hw_features |= NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM; rmnet_dev->hw_features |= NETIF_F_SG; + priv->real_dev = real_dev; + rc = register_netdevice(rmnet_dev); if (!rc) { ep->egress_dev = rmnet_dev; @@ -255,9 +257,7 @@ int rmnet_vnd_newlink(u8 id, struct net_device *rmnet_dev, rmnet_dev->rtnl_link_ops = &rmnet_link_ops; - priv = netdev_priv(rmnet_dev); priv->mux_id = id; - priv->real_dev = real_dev; netdev_dbg(rmnet_dev, "rmnet dev created\n"); } From 62230715fd2453b3ba948c9d83cfb3ada9169169 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=EB=B0=B0=EC=84=9D=EC=A7=84?= <soukjin.bae@samsung.com> Date: Fri, 9 Nov 2018 16:53:06 -0800 Subject: [PATCH 338/443] flow_dissector: do not dissect l4 ports for fragments MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Only first fragment has the sport/dport information, not the following ones. If we want consistent hash for all fragments, we need to ignore ports even for first fragment. This bug is visible for IPv6 traffic, if incoming fragments do not have a flow label, since skb_get_hash() will give different results for first fragment and following ones. It is also visible if any routing rule wants dissection and sport or dport. See commit 5e5d6fed3741 ("ipv6: route: dissect flow in input path if fib rules need it") for details. [edumazet] rewrote the changelog completely. Fixes: 06635a35d13d ("flow_dissect: use programable dissector in skb_flow_dissect and friends") Signed-off-by: 배석진 <soukjin.bae@samsung.com> Signed-off-by: Eric Dumazet <edumazet@google.com> Signed-off-by: David S. Miller <davem@davemloft.net> --- net/core/flow_dissector.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/net/core/flow_dissector.c b/net/core/flow_dissector.c index 676f3ad629f95..588f475019d47 100644 --- a/net/core/flow_dissector.c +++ b/net/core/flow_dissector.c @@ -1166,8 +1166,8 @@ bool __skb_flow_dissect(const struct sk_buff *skb, break; } - if (dissector_uses_key(flow_dissector, - FLOW_DISSECTOR_KEY_PORTS)) { + if (dissector_uses_key(flow_dissector, FLOW_DISSECTOR_KEY_PORTS) && + !(key_control->flags & FLOW_DIS_IS_FRAGMENT)) { key_ports = skb_flow_dissector_target(flow_dissector, FLOW_DISSECTOR_KEY_PORTS, target_container); From fbd1d5245372e48b494120a30fe0b34b304576c4 Mon Sep 17 00:00:00 2001 From: Alexandre Belloni <alexandre.belloni@bootlin.com> Date: Fri, 9 Nov 2018 17:37:20 +0100 Subject: [PATCH 339/443] net: mvneta: correct typo The reserved variable should be named reserved1. Signed-off-by: Alexandre Belloni <alexandre.belloni@bootlin.com> Signed-off-by: David S. Miller <davem@davemloft.net> --- drivers/net/ethernet/marvell/mvneta.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/marvell/mvneta.c b/drivers/net/ethernet/marvell/mvneta.c index 5bfd349bf41ac..3ba672e9e353d 100644 --- a/drivers/net/ethernet/marvell/mvneta.c +++ b/drivers/net/ethernet/marvell/mvneta.c @@ -494,7 +494,7 @@ struct mvneta_port { #if defined(__LITTLE_ENDIAN) struct mvneta_tx_desc { u32 command; /* Options used by HW for packet transmitting.*/ - u16 reserverd1; /* csum_l4 (for future use) */ + u16 reserved1; /* csum_l4 (for future use) */ u16 data_size; /* Data size of transmitted packet in bytes */ u32 buf_phys_addr; /* Physical addr of transmitted buffer */ u32 reserved2; /* hw_cmd - (for future use, PMT) */ @@ -519,7 +519,7 @@ struct mvneta_rx_desc { #else struct mvneta_tx_desc { u16 data_size; /* Data size of transmitted packet in bytes */ - u16 reserverd1; /* csum_l4 (for future use) */ + u16 reserved1; /* csum_l4 (for future use) */ u32 command; /* Options used by HW for packet transmitting.*/ u32 reserved2; /* hw_cmd - (for future use, PMT) */ u32 buf_phys_addr; /* Physical addr of transmitted buffer */ From de7b75d82f70c5469675b99ad632983c50b6f7e7 Mon Sep 17 00:00:00 2001 From: Jens Axboe <axboe@kernel.dk> Date: Fri, 9 Nov 2018 15:58:40 -0700 Subject: [PATCH 340/443] floppy: fix race condition in __floppy_read_block_0() LKP recently reported a hang at bootup in the floppy code: [ 245.678853] INFO: task mount:580 blocked for more than 120 seconds. [ 245.679906] Tainted: G T 4.19.0-rc6-00172-ga9f38e1 #1 [ 245.680959] "echo 0 > /proc/sys/kernel/hung_task_timeout_secs" disables this message. [ 245.682181] mount D 6372 580 1 0x00000004 [ 245.683023] Call Trace: [ 245.683425] __schedule+0x2df/0x570 [ 245.683975] schedule+0x2d/0x80 [ 245.684476] schedule_timeout+0x19d/0x330 [ 245.685090] ? wait_for_common+0xa5/0x170 [ 245.685735] wait_for_common+0xac/0x170 [ 245.686339] ? do_sched_yield+0x90/0x90 [ 245.686935] wait_for_completion+0x12/0x20 [ 245.687571] __floppy_read_block_0+0xfb/0x150 [ 245.688244] ? floppy_resume+0x40/0x40 [ 245.688844] floppy_revalidate+0x20f/0x240 [ 245.689486] check_disk_change+0x43/0x60 [ 245.690087] floppy_open+0x1ea/0x360 [ 245.690653] __blkdev_get+0xb4/0x4d0 [ 245.691212] ? blkdev_get+0x1db/0x370 [ 245.691777] blkdev_get+0x1f3/0x370 [ 245.692351] ? path_put+0x15/0x20 [ 245.692871] ? lookup_bdev+0x4b/0x90 [ 245.693539] blkdev_get_by_path+0x3d/0x80 [ 245.694165] mount_bdev+0x2a/0x190 [ 245.694695] squashfs_mount+0x10/0x20 [ 245.695271] ? squashfs_alloc_inode+0x30/0x30 [ 245.695960] mount_fs+0xf/0x90 [ 245.696451] vfs_kern_mount+0x43/0x130 [ 245.697036] do_mount+0x187/0xc40 [ 245.697563] ? memdup_user+0x28/0x50 [ 245.698124] ksys_mount+0x60/0xc0 [ 245.698639] sys_mount+0x19/0x20 [ 245.699167] do_int80_syscall_32+0x61/0x130 [ 245.699813] entry_INT80_32+0xc7/0xc7 showing that we never complete that read request. The reason is that the completion setup is racy - it initializes the completion event AFTER submitting the IO, which means that the IO could complete before/during the init. If it does, we are passing garbage to complete() and we may sleep forever waiting for the event to occur. Fixes: 7b7b68bba5ef ("floppy: bail out in open() if drive is not responding to block0 read") Reviewed-by: Omar Sandoval <osandov@fb.com> Signed-off-by: Jens Axboe <axboe@kernel.dk> --- drivers/block/floppy.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/block/floppy.c b/drivers/block/floppy.c index a8cfa011c2848..fb23578e9a416 100644 --- a/drivers/block/floppy.c +++ b/drivers/block/floppy.c @@ -4148,10 +4148,11 @@ static int __floppy_read_block_0(struct block_device *bdev, int drive) bio.bi_end_io = floppy_rb0_cb; bio_set_op_attrs(&bio, REQ_OP_READ, 0); + init_completion(&cbdata.complete); + submit_bio(&bio); process_fd_request(); - init_completion(&cbdata.complete); wait_for_completion(&cbdata.complete); __free_page(page); From 3fa58dcab50a0aa16817f16a8d38aee869eb3fb9 Mon Sep 17 00:00:00 2001 From: Dan Williams <dan.j.williams@intel.com> Date: Thu, 1 Nov 2018 00:30:22 -0700 Subject: [PATCH 341/443] acpi, nfit: Fix ARS overflow continuation When the platform BIOS is unable to report all the media error records it requires the OS to restart the scrub at a prescribed location. The driver detects the overflow condition, but then fails to report it to the ARS state machine after reaping the records. Propagate -ENOSPC correctly to continue the ARS operation. Cc: <stable@vger.kernel.org> Fixes: 1cf03c00e7c1 ("nfit: scrub and register regions in a workqueue") Reported-by: Jacek Zloch <jacek.zloch@intel.com> Reviewed-by: Dave Jiang <dave.jiang@intel.com> Signed-off-by: Dan Williams <dan.j.williams@intel.com> --- drivers/acpi/nfit/core.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/acpi/nfit/core.c b/drivers/acpi/nfit/core.c index f8c638f3c946d..5970b8f5f7680 100644 --- a/drivers/acpi/nfit/core.c +++ b/drivers/acpi/nfit/core.c @@ -2928,9 +2928,9 @@ static int acpi_nfit_query_poison(struct acpi_nfit_desc *acpi_desc) return rc; if (ars_status_process_records(acpi_desc)) - return -ENOMEM; + dev_err(acpi_desc->dev, "Failed to process ARS records\n"); - return 0; + return rc; } static int ars_register(struct acpi_nfit_desc *acpi_desc, From 2121db09630113e67b51ae78c18115f1858f648a Mon Sep 17 00:00:00 2001 From: Dan Williams <dan.j.williams@intel.com> Date: Sat, 3 Nov 2018 17:53:09 -0700 Subject: [PATCH 342/443] Revert "acpi, nfit: Further restrict userspace ARS start requests" The following lockdep splat results from acquiring the init_mutex in acpi_nfit_clear_to_send(): WARNING: possible circular locking dependency detected lt-daxdev-error/7216 is trying to acquire lock: 00000000f694db15 (&acpi_desc->init_mutex){+.+.}, at: acpi_nfit_clear_to_send+0x27/0x80 [nfit] but task is already holding lock: 00000000182298f2 (&nvdimm_bus->reconfig_mutex){+.+.}, at: __nd_ioctl+0x457/0x610 [libnvdimm] which lock already depends on the new lock. the existing dependency chain (in reverse order) is: -> #1 (&nvdimm_bus->reconfig_mutex){+.+.}: nvdimm_badblocks_populate+0x41/0x150 [libnvdimm] nd_region_notify+0x95/0xb0 [libnvdimm] nd_device_notify+0x40/0x50 [libnvdimm] ars_complete+0x7f/0xd0 [nfit] acpi_nfit_scrub+0xbb/0x410 [nfit] process_one_work+0x22b/0x5c0 worker_thread+0x3c/0x390 kthread+0x11e/0x140 ret_from_fork+0x3a/0x50 -> #0 (&acpi_desc->init_mutex){+.+.}: __mutex_lock+0x83/0x980 acpi_nfit_clear_to_send+0x27/0x80 [nfit] __nd_ioctl+0x474/0x610 [libnvdimm] nd_ioctl+0xa4/0xb0 [libnvdimm] do_vfs_ioctl+0xa5/0x6e0 ksys_ioctl+0x70/0x80 __x64_sys_ioctl+0x16/0x20 do_syscall_64+0x60/0x210 entry_SYSCALL_64_after_hwframe+0x49/0xbe New infrastructure is needed to be able to perform this check without acquiring the lock. Fixes: 594861215c83 ("acpi, nfit: Further restrict userspace ARS start") Cc: Dave Jiang <dave.jiang@intel.com> Signed-off-by: Dan Williams <dan.j.williams@intel.com> --- drivers/acpi/nfit/core.c | 15 +++------------ 1 file changed, 3 insertions(+), 12 deletions(-) diff --git a/drivers/acpi/nfit/core.c b/drivers/acpi/nfit/core.c index 5970b8f5f7680..14d9f5bea0151 100644 --- a/drivers/acpi/nfit/core.c +++ b/drivers/acpi/nfit/core.c @@ -3341,8 +3341,6 @@ static int acpi_nfit_clear_to_send(struct nvdimm_bus_descriptor *nd_desc, struct nvdimm *nvdimm, unsigned int cmd) { struct acpi_nfit_desc *acpi_desc = to_acpi_nfit_desc(nd_desc); - struct nfit_spa *nfit_spa; - int rc = 0; if (nvdimm) return 0; @@ -3355,17 +3353,10 @@ static int acpi_nfit_clear_to_send(struct nvdimm_bus_descriptor *nd_desc, * just needs guarantees that any ARS it initiates are not * interrupted by any intervening start requests from userspace. */ - mutex_lock(&acpi_desc->init_mutex); - list_for_each_entry(nfit_spa, &acpi_desc->spas, list) - if (acpi_desc->scrub_spa - || test_bit(ARS_REQ_SHORT, &nfit_spa->ars_state) - || test_bit(ARS_REQ_LONG, &nfit_spa->ars_state)) { - rc = -EBUSY; - break; - } - mutex_unlock(&acpi_desc->init_mutex); + if (work_busy(&acpi_desc->dwork.work)) + return -EBUSY; - return rc; + return 0; } int acpi_nfit_ars_rescan(struct acpi_nfit_desc *acpi_desc, From 63c82997f5c0f3e1b914af43d82f712a86bc5f3a Mon Sep 17 00:00:00 2001 From: Jakub Kicinski <jakub.kicinski@netronome.com> Date: Fri, 9 Nov 2018 21:06:26 -0800 Subject: [PATCH 343/443] net: sched: cls_flower: validate nested enc_opts_policy to avoid warning MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit TCA_FLOWER_KEY_ENC_OPTS and TCA_FLOWER_KEY_ENC_OPTS_MASK can only currently contain further nested attributes, which are parsed by hand, so the policy is never actually used resulting in a W=1 build warning: net/sched/cls_flower.c:492:1: warning: ‘enc_opts_policy’ defined but not used [-Wunused-const-variable=] enc_opts_policy[TCA_FLOWER_KEY_ENC_OPTS_MAX + 1] = { Add the validation anyway to avoid potential bugs when other attributes are added and to make the attribute structure slightly more clear. Validation will also set extact to point to bad attribute on error. Fixes: 0a6e77784f49 ("net/sched: allow flower to match tunnel options") Signed-off-by: Jakub Kicinski <jakub.kicinski@netronome.com> Acked-by: Simon Horman <simon.horman@netronome.com> Acked-by: Jiri Pirko <jiri@mellanox.com> Signed-off-by: David S. Miller <davem@davemloft.net> --- net/sched/cls_flower.c | 14 +++++++++++++- 1 file changed, 13 insertions(+), 1 deletion(-) diff --git a/net/sched/cls_flower.c b/net/sched/cls_flower.c index 9aada2d0ef065..c6c327874abcc 100644 --- a/net/sched/cls_flower.c +++ b/net/sched/cls_flower.c @@ -709,11 +709,23 @@ static int fl_set_enc_opt(struct nlattr **tb, struct fl_flow_key *key, struct netlink_ext_ack *extack) { const struct nlattr *nla_enc_key, *nla_opt_key, *nla_opt_msk = NULL; - int option_len, key_depth, msk_depth = 0; + int err, option_len, key_depth, msk_depth = 0; + + err = nla_validate_nested(tb[TCA_FLOWER_KEY_ENC_OPTS], + TCA_FLOWER_KEY_ENC_OPTS_MAX, + enc_opts_policy, extack); + if (err) + return err; nla_enc_key = nla_data(tb[TCA_FLOWER_KEY_ENC_OPTS]); if (tb[TCA_FLOWER_KEY_ENC_OPTS_MASK]) { + err = nla_validate_nested(tb[TCA_FLOWER_KEY_ENC_OPTS_MASK], + TCA_FLOWER_KEY_ENC_OPTS_MAX, + enc_opts_policy, extack); + if (err) + return err; + nla_opt_msk = nla_data(tb[TCA_FLOWER_KEY_ENC_OPTS_MASK]); msk_depth = nla_len(tb[TCA_FLOWER_KEY_ENC_OPTS_MASK]); } From 6bbe4385d035c6fac56f840a59861a0310ce137b Mon Sep 17 00:00:00 2001 From: Masahiro Yamada <yamada.masahiro@socionext.com> Date: Mon, 5 Nov 2018 17:19:36 +0900 Subject: [PATCH 344/443] kconfig: merge_config: avoid false positive matches from comment lines The current SED_CONFIG_EXP could match to comment lines in config fragment files, especially when CONFIG_PREFIX_ is empty. For example, Buildroot uses empty prefixing; starting symbols with BR2_ is just convention. Make the sed expression more robust against false positives from comment lines. The new sed expression matches to only valid patterns. Signed-off-by: Masahiro Yamada <yamada.masahiro@socionext.com> Reviewed-by: Petr Vorel <petr.vorel@gmail.com> Reviewed-by: Arnout Vandecappelle (Essensium/Mind) <arnout@mind.be> --- scripts/kconfig/merge_config.sh | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/scripts/kconfig/merge_config.sh b/scripts/kconfig/merge_config.sh index da66e7742282a..0ef906499646b 100755 --- a/scripts/kconfig/merge_config.sh +++ b/scripts/kconfig/merge_config.sh @@ -102,7 +102,8 @@ if [ ! -r "$INITFILE" ]; then fi MERGE_LIST=$* -SED_CONFIG_EXP="s/^\(# \)\{0,1\}\(${CONFIG_PREFIX}[a-zA-Z0-9_]*\)[= ].*/\2/p" +SED_CONFIG_EXP1="s/^\(${CONFIG_PREFIX}[a-zA-Z0-9_]*\)=.*/\1/p" +SED_CONFIG_EXP2="s/^# \(${CONFIG_PREFIX}[a-zA-Z0-9_]*\) is not set$/\1/p" TMP_FILE=$(mktemp ./.tmp.config.XXXXXXXXXX) @@ -116,7 +117,7 @@ for MERGE_FILE in $MERGE_LIST ; do echo "The merge file '$MERGE_FILE' does not exist. Exit." >&2 exit 1 fi - CFG_LIST=$(sed -n "$SED_CONFIG_EXP" $MERGE_FILE) + CFG_LIST=$(sed -n -e "$SED_CONFIG_EXP1" -e "$SED_CONFIG_EXP2" $MERGE_FILE) for CFG in $CFG_LIST ; do grep -q -w $CFG $TMP_FILE || continue @@ -159,7 +160,7 @@ make KCONFIG_ALLCONFIG=$TMP_FILE $OUTPUT_ARG $ALLTARGET # Check all specified config values took (might have missed-dependency issues) -for CFG in $(sed -n "$SED_CONFIG_EXP" $TMP_FILE); do +for CFG in $(sed -n -e "$SED_CONFIG_EXP1" -e "$SED_CONFIG_EXP2" $TMP_FILE); do REQUESTED_VAL=$(grep -w -e "$CFG" $TMP_FILE) ACTUAL_VAL=$(grep -w -e "$CFG" "$KCONFIG_CONFIG") From bbcde0a7241261cd0ca8d8e6b94a4113a4b71443 Mon Sep 17 00:00:00 2001 From: Masahiro Yamada <yamada.masahiro@socionext.com> Date: Tue, 6 Nov 2018 13:18:05 +0900 Subject: [PATCH 345/443] kbuild: deb-pkg: fix too low build version number Since commit b41d920acff8 ("kbuild: deb-pkg: split generating packaging and build"), the build version of the kernel contained in a deb package is too low by 1. Prior to the bad commit, the kernel was built first, then the number in .version file was read out, and written into the debian control file. Now, the debian control file is created before the kernel is actually compiled, which is causing the version number mismatch. Let the mkdebian script pass KBUILD_BUILD_VERSION=${revision} to require the build system to use the specified version number. Fixes: b41d920acff8 ("kbuild: deb-pkg: split generating packaging and build") Reported-by: Doug Smythies <dsmythies@telus.net> Signed-off-by: Masahiro Yamada <yamada.masahiro@socionext.com> Tested-by: Doug Smythies <dsmythies@telus.net> --- scripts/package/mkdebian | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/scripts/package/mkdebian b/scripts/package/mkdebian index 663a7f343b42c..edcad61fe3cda 100755 --- a/scripts/package/mkdebian +++ b/scripts/package/mkdebian @@ -88,6 +88,7 @@ set_debarch() { version=$KERNELRELEASE if [ -n "$KDEB_PKGVERSION" ]; then packageversion=$KDEB_PKGVERSION + revision=${packageversion##*-} else revision=$(cat .version 2>/dev/null||echo 1) packageversion=$version-$revision @@ -205,10 +206,12 @@ cat <<EOF > debian/rules #!$(command -v $MAKE) -f build: - \$(MAKE) KERNELRELEASE=${version} ARCH=${ARCH} KBUILD_SRC= + \$(MAKE) KERNELRELEASE=${version} ARCH=${ARCH} \ + KBUILD_BUILD_VERSION=${revision} KBUILD_SRC= binary-arch: - \$(MAKE) KERNELRELEASE=${version} ARCH=${ARCH} KBUILD_SRC= intdeb-pkg + \$(MAKE) KERNELRELEASE=${version} ARCH=${ARCH} \ + KBUILD_BUILD_VERSION=${revision} KBUILD_SRC= intdeb-pkg clean: rm -rf debian/*tmp debian/files From 8ef14c2c41d962756d314f1d7dc972b0ea7a180f Mon Sep 17 00:00:00 2001 From: Guenter Roeck <linux@roeck-us.net> Date: Tue, 6 Nov 2018 10:10:38 -0800 Subject: [PATCH 346/443] Revert "scripts/setlocalversion: git: Make -dirty check more robust" This reverts commit 6147b1cf19651c7de297e69108b141fb30aa2349. The reverted patch results in attempted write access to the source repository, even if that repository is mounted read-only. Output from "strace git status -uno --porcelain": getcwd("/tmp/linux-test", 129) = 16 open("/tmp/linux-test/.git/index.lock", O_RDWR|O_CREAT|O_EXCL|O_CLOEXEC, 0666) = -1 EROFS (Read-only file system) While git appears to be able to handle this situation, a monitored build environment (such as the one used for Chrome OS kernel builds) may detect it and bail out with an access violation error. On top of that, the attempted write access suggests that git _will_ write to the file even if a build output directory is specified. Users may have the reasonable expectation that the source repository remains untouched in that situation. Fixes: 6147b1cf19651 ("scripts/setlocalversion: git: Make -dirty check more robust" Cc: Genki Sky <sky@genki.is> Signed-off-by: Guenter Roeck <linux@roeck-us.net> Reviewed-by: Brian Norris <briannorris@chromium.org> Signed-off-by: Masahiro Yamada <yamada.masahiro@socionext.com> --- scripts/setlocalversion | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/setlocalversion b/scripts/setlocalversion index 79f7dd57d571e..71f39410691b6 100755 --- a/scripts/setlocalversion +++ b/scripts/setlocalversion @@ -74,7 +74,7 @@ scm_version() fi # Check for uncommitted changes - if git status -uno --porcelain | grep -qv '^.. scripts/package'; then + if git diff-index --name-only HEAD | grep -qv "^scripts/package"; then printf '%s' -dirty fi From d5615e472d23e854e96192103b6ae7977e705f01 Mon Sep 17 00:00:00 2001 From: Rob Herring <robh@kernel.org> Date: Wed, 7 Nov 2018 08:36:46 -0600 Subject: [PATCH 347/443] builddeb: Fix inclusion of dtbs in debian package MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Commit 37c8a5fafa3b ("kbuild: consolidate Devicetree dtb build rules") moved the location of 'dtbs_install' target which caused dtbs to not be installed when building debian package with 'bindeb-pkg' target. Update the builddeb script to use the same logic that determines if there's a 'dtbs_install' target which is presence of the arch dts directory. Also, use CONFIG_OF_EARLY_FLATTREE instead of CONFIG_OF as that's a better indication of whether we are building dtbs. This commit will also have the side effect of installing dtbs on any arch that has dts files. Previously, it was dependent on whether the arch defined 'dtbs_install'. Fixes: 37c8a5fafa3b ("kbuild: consolidate Devicetree dtb build rules") Reported-by: Nuno Gonçalves <nunojpg@gmail.com> Signed-off-by: Rob Herring <robh@kernel.org> Signed-off-by: Masahiro Yamada <yamada.masahiro@socionext.com> --- scripts/package/builddeb | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/scripts/package/builddeb b/scripts/package/builddeb index 0b31f4f1f92cf..f43a274f4f1d5 100755 --- a/scripts/package/builddeb +++ b/scripts/package/builddeb @@ -83,9 +83,9 @@ else fi cp "$($MAKE -s -f $srctree/Makefile image_name)" "$tmpdir/$installed_image_path" -if grep -q "^CONFIG_OF=y" $KCONFIG_CONFIG ; then +if grep -q "^CONFIG_OF_EARLY_FLATTREE=y" $KCONFIG_CONFIG ; then # Only some architectures with OF support have this target - if grep -q dtbs_install "${srctree}/arch/$SRCARCH/Makefile"; then + if [ -d "${srctree}/arch/$SRCARCH/boot/dts" ]; then $MAKE KBUILD_SRC= INSTALL_DTBS_PATH="$tmpdir/usr/lib/$packagename" dtbs_install fi fi From 7ab412d33b4c7ff3e0148d3db25dd861edd1283d Mon Sep 17 00:00:00 2001 From: Jon Maloy <donmalo99@gmail.com> Date: Sat, 10 Nov 2018 17:30:24 -0500 Subject: [PATCH 348/443] tipc: fix link re-establish failure When a link failure is detected locally, the link is reset, the flag link->in_session is set to false, and a RESET_MSG with the 'stopping' bit set is sent to the peer. The purpose of this bit is to inform the peer that this endpoint just is going down, and that the peer should handle the reception of this particular RESET message as a local failure. This forces the peer to accept another RESET or ACTIVATE message from this endpoint before it can re-establish the link. This again is necessary to ensure that link session numbers are properly exchanged before the link comes up again. If a failure is detected locally at the same time at the peer endpoint this will do the same, which is also a correct behavior. However, when receiving such messages, the endpoints will not distinguish between 'stopping' RESETs and ordinary ones when it comes to updating session numbers. Both endpoints will copy the received session number and set their 'in_session' flags to true at the reception, while they are still expecting another RESET from the peer before they can go ahead and re-establish. This is contradictory, since, after applying the validation check referred to below, the 'in_session' flag will cause rejection of all such messages, and the link will never come up again. We now fix this by not only handling received RESET/STOPPING messages as a local failure, but also by omitting to set a new session number and the 'in_session' flag in such cases. Fixes: 7ea817f4e832 ("tipc: check session number before accepting link protocol messages") Signed-off-by: Jon Maloy <jon.maloy@ericsson.com> Signed-off-by: David S. Miller <davem@davemloft.net> --- net/tipc/link.c | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/net/tipc/link.c b/net/tipc/link.c index 201c3b5bc96be..836727e363c46 100644 --- a/net/tipc/link.c +++ b/net/tipc/link.c @@ -1594,14 +1594,17 @@ static int tipc_link_proto_rcv(struct tipc_link *l, struct sk_buff *skb, if (in_range(peers_prio, l->priority + 1, TIPC_MAX_LINK_PRI)) l->priority = peers_prio; - /* ACTIVATE_MSG serves as PEER_RESET if link is already down */ - if (msg_peer_stopping(hdr)) + /* If peer is going down we want full re-establish cycle */ + if (msg_peer_stopping(hdr)) { rc = tipc_link_fsm_evt(l, LINK_FAILURE_EVT); - else if ((mtyp == RESET_MSG) || !link_is_up(l)) + break; + } + /* ACTIVATE_MSG serves as PEER_RESET if link is already down */ + if (mtyp == RESET_MSG || !link_is_up(l)) rc = tipc_link_fsm_evt(l, LINK_PEER_RESET_EVT); /* ACTIVATE_MSG takes up link if it was already locally reset */ - if ((mtyp == ACTIVATE_MSG) && (l->state == LINK_ESTABLISHING)) + if (mtyp == ACTIVATE_MSG && l->state == LINK_ESTABLISHING) rc = TIPC_LINK_UP_EVT; l->peer_session = msg_session(hdr); From a9049ff9214da68df1179a7d5e36b43479abc9b8 Mon Sep 17 00:00:00 2001 From: Andrew Lunn <andrew@lunn.ch> Date: Sun, 11 Nov 2018 00:41:10 +0100 Subject: [PATCH 349/443] net: dsa: mv88e6xxx: Fix clearing of stats counters The mv88e6161 would sometime fail to probe with a timeout waiting for the switch to complete an operation. This operation is supposed to clear the statistics counters. However, due to a read/modify/write, without the needed mask, the operation actually carried out was more random, with invalid parameters, resulting in the switch not responding. We need to preserve the histogram mode bits, so apply a mask to keep them. Reported-by: Chris Healy <Chris.Healy@zii.aero> Fixes: 40cff8fca9e3 ("net: dsa: mv88e6xxx: Fix stats histogram mode") Signed-off-by: Andrew Lunn <andrew@lunn.ch> Signed-off-by: David S. Miller <davem@davemloft.net> --- drivers/net/dsa/mv88e6xxx/global1.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/net/dsa/mv88e6xxx/global1.c b/drivers/net/dsa/mv88e6xxx/global1.c index d721ccf7d8bed..38e399e0f30e1 100644 --- a/drivers/net/dsa/mv88e6xxx/global1.c +++ b/drivers/net/dsa/mv88e6xxx/global1.c @@ -567,6 +567,8 @@ int mv88e6xxx_g1_stats_clear(struct mv88e6xxx_chip *chip) if (err) return err; + /* Keep the histogram mode bits */ + val &= MV88E6XXX_G1_STATS_OP_HIST_RX_TX; val |= MV88E6XXX_G1_STATS_OP_BUSY | MV88E6XXX_G1_STATS_OP_FLUSH_ALL; err = mv88e6xxx_g1_write(chip, MV88E6XXX_G1_STATS_OP, val); From 7236ead1b14923f3ba35cd29cce13246be83f451 Mon Sep 17 00:00:00 2001 From: Eric Dumazet <edumazet@google.com> Date: Sat, 10 Nov 2018 16:22:29 -0800 Subject: [PATCH 350/443] act_mirred: clear skb->tstamp on redirect If sch_fq is used at ingress, skbs that might have been timestamped by net_timestamp_set() if a packet capture is requesting timestamps could be delayed by arbitrary amount of time, since sch_fq time base is MONOTONIC. Fix this problem by moving code from sch_netem.c to act_mirred.c. Fixes: fb420d5d91c1 ("tcp/fq: move back to CLOCK_MONOTONIC") Signed-off-by: Eric Dumazet <edumazet@google.com> Signed-off-by: David S. Miller <davem@davemloft.net> --- net/sched/act_mirred.c | 3 ++- net/sched/sch_netem.c | 9 --------- 2 files changed, 2 insertions(+), 10 deletions(-) diff --git a/net/sched/act_mirred.c b/net/sched/act_mirred.c index 1dae5f2b358fc..c8cf4d10c4355 100644 --- a/net/sched/act_mirred.c +++ b/net/sched/act_mirred.c @@ -258,7 +258,8 @@ static int tcf_mirred_act(struct sk_buff *skb, const struct tc_action *a, if (is_redirect) { skb2->tc_redirected = 1; skb2->tc_from_ingress = skb2->tc_at_ingress; - + if (skb2->tc_from_ingress) + skb2->tstamp = 0; /* let's the caller reinsert the packet, if possible */ if (use_reinsert) { res->ingress = want_ingress; diff --git a/net/sched/sch_netem.c b/net/sched/sch_netem.c index 57b3ad9394ad7..2c38e3d079246 100644 --- a/net/sched/sch_netem.c +++ b/net/sched/sch_netem.c @@ -648,15 +648,6 @@ static struct sk_buff *netem_dequeue(struct Qdisc *sch) */ skb->dev = qdisc_dev(sch); -#ifdef CONFIG_NET_CLS_ACT - /* - * If it's at ingress let's pretend the delay is - * from the network (tstamp will be updated). - */ - if (skb->tc_redirected && skb->tc_from_ingress) - skb->tstamp = 0; -#endif - if (q->slot.slot_next) { q->slot.packets_left--; q->slot.bytes_left -= qdisc_pkt_len(skb); From ccda4af0f4b92f7b4c308d3acc262f4a7e3affad Mon Sep 17 00:00:00 2001 From: Linus Torvalds <torvalds@linux-foundation.org> Date: Sun, 11 Nov 2018 17:12:31 -0600 Subject: [PATCH 351/443] Linux 4.20-rc2 --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 9fce8b91c15f6..2f36db8978953 100644 --- a/Makefile +++ b/Makefile @@ -2,7 +2,7 @@ VERSION = 4 PATCHLEVEL = 20 SUBLEVEL = 0 -EXTRAVERSION = -rc1 +EXTRAVERSION = -rc2 NAME = "People's Front" # *DOCUMENTATION* From c8b00bb742dd036388f37d019dbb9db177f3e66c Mon Sep 17 00:00:00 2001 From: Michael Ellerman <mpe@ellerman.id.au> Date: Thu, 1 Nov 2018 16:21:05 +1100 Subject: [PATCH 352/443] powerpc/mm/64s: Fix preempt warning in slb_allocate_kernel() With preempt enabled we see warnings in do_slb_fault(): BUG: using smp_processor_id() in preemptible [00000000] code: kworker/u33:0/98 futex hash table entries: 4096 (order: 3, 524288 bytes) caller is do_slb_fault+0x204/0x230 CPU: 5 PID: 98 Comm: kworker/u33:0 Not tainted 4.19.0-rc3-gcc-7.3.1-00022-g1936f094e164 #138 Call Trace: dump_stack+0xb4/0x104 (unreliable) check_preemption_disabled+0x148/0x150 do_slb_fault+0x204/0x230 data_access_slb_common+0x138/0x180 This is caused by the get_paca() in slb_allocate_kernel(), which includes a call to debug_smp_processor_id(). slb_allocate_kernel() can only be called from do_slb_fault(), and in that path interrupts are hard disabled and so we can't be preempted, but we can't update the preempt flags (in thread_info) because that could cause an SLB fault. So just use local_paca which is safe and doesn't cause the warning. Fixes: 48e7b7695745 ("powerpc/64s/hash: Convert SLB miss handlers to C") Signed-off-by: Michael Ellerman <mpe@ellerman.id.au> --- arch/powerpc/mm/slb.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/powerpc/mm/slb.c b/arch/powerpc/mm/slb.c index b663a36f9ada1..bc3914d54e26e 100644 --- a/arch/powerpc/mm/slb.c +++ b/arch/powerpc/mm/slb.c @@ -708,7 +708,7 @@ static long slb_allocate_kernel(unsigned long ea, unsigned long id) return -EFAULT; if (ea < H_VMALLOC_END) - flags = get_paca()->vmalloc_sllp; + flags = local_paca->vmalloc_sllp; else flags = SLB_VSID_KERNEL | mmu_psize_defs[mmu_io_psize].sllp; } else { From 43c6494fa1499912c8177e71450c0279041152a6 Mon Sep 17 00:00:00 2001 From: Michael Ellerman <mpe@ellerman.id.au> Date: Tue, 6 Nov 2018 23:37:58 +1100 Subject: [PATCH 353/443] powerpc/io: Fix the IO workarounds code to work with Radix Back in 2006 Ben added some workarounds for a misbehaviour in the Spider IO bridge used on early Cell machines, see commit 014da7ff47b5 ("[POWERPC] Cell "Spider" MMIO workarounds"). Later these were made to be generic, ie. not tied specifically to Spider. The code stashes a token in the high bits (59-48) of virtual addresses used for IO (eg. returned from ioremap()). This works fine when using the Hash MMU, but when we're using the Radix MMU the bits used for the token overlap with some of the bits of the virtual address. This is because the maximum virtual address is larger with Radix, up to c00fffffffffffff, and in fact we use that high part of the address range for ioremap(), see RADIX_KERN_IO_START. As it happens the bits that are used overlap with the bits that differentiate an IO address vs a linear map address. If the resulting address lies outside the linear mapping we will crash (see below), if not we just corrupt memory. virtio-pci 0000:00:00.0: Using 64-bit direct DMA at offset 800000000000000 Unable to handle kernel paging request for data at address 0xc000000080000014 ... CFAR: c000000000626b98 DAR: c000000080000014 DSISR: 42000000 IRQMASK: 0 GPR00: c0000000006c54fc c00000003e523378 c0000000016de600 0000000000000000 GPR04: c00c000080000014 0000000000000007 0fffffff000affff 0000000000000030 ^^^^ ... NIP [c000000000626c5c] .iowrite8+0xec/0x100 LR [c0000000006c992c] .vp_reset+0x2c/0x90 Call Trace: .pci_bus_read_config_dword+0xc4/0x120 (unreliable) .register_virtio_device+0x13c/0x1c0 .virtio_pci_probe+0x148/0x1f0 .local_pci_probe+0x68/0x140 .pci_device_probe+0x164/0x220 .really_probe+0x274/0x3b0 .driver_probe_device+0x80/0x170 .__driver_attach+0x14c/0x150 .bus_for_each_dev+0xb8/0x130 .driver_attach+0x34/0x50 .bus_add_driver+0x178/0x2f0 .driver_register+0x90/0x1a0 .__pci_register_driver+0x6c/0x90 .virtio_pci_driver_init+0x2c/0x40 .do_one_initcall+0x64/0x280 .kernel_init_freeable+0x36c/0x474 .kernel_init+0x24/0x160 .ret_from_kernel_thread+0x58/0x7c This hasn't been a problem because CONFIG_PPC_IO_WORKAROUNDS which enables this code is usually not enabled. It is only enabled when it's selected by PPC_CELL_NATIVE which is only selected by PPC_IBM_CELL_BLADE and that in turn depends on BIG_ENDIAN. So in order to hit the bug you need to build a big endian kernel, with IBM Cell Blade support enabled, as well as Radix MMU support, and then boot that on Power9 using Radix MMU. Still we can fix the bug, so let's do that. We simply use fewer bits for the token, taking the union of the restrictions on the address from both Hash and Radix, we end up with 8 bits we can use for the token. The only user of the token is iowa_mem_find_bus() which only supports 8 token values, so 8 bits is plenty for that. Fixes: 566ca99af026 ("powerpc/mm/radix: Add dummy radix_enabled()") Signed-off-by: Michael Ellerman <mpe@ellerman.id.au> --- arch/powerpc/include/asm/io.h | 20 +++++++------------- 1 file changed, 7 insertions(+), 13 deletions(-) diff --git a/arch/powerpc/include/asm/io.h b/arch/powerpc/include/asm/io.h index 3ef40b703c4ab..e746becd9d6ff 100644 --- a/arch/powerpc/include/asm/io.h +++ b/arch/powerpc/include/asm/io.h @@ -268,19 +268,13 @@ extern void _memcpy_toio(volatile void __iomem *dest, const void *src, * their hooks, a bitfield is reserved for use by the platform near the * top of MMIO addresses (not PIO, those have to cope the hard way). * - * This bit field is 12 bits and is at the top of the IO virtual - * addresses PCI_IO_INDIRECT_TOKEN_MASK. + * The highest address in the kernel virtual space are: * - * The kernel virtual space is thus: + * d0003fffffffffff # with Hash MMU + * c00fffffffffffff # with Radix MMU * - * 0xD000000000000000 : vmalloc - * 0xD000080000000000 : PCI PHB IO space - * 0xD000080080000000 : ioremap - * 0xD0000fffffffffff : end of ioremap region - * - * Since the top 4 bits are reserved as the region ID, we use thus - * the next 12 bits and keep 4 bits available for the future if the - * virtual address space is ever to be extended. + * The top 4 bits are reserved as the region ID on hash, leaving us 8 bits + * that can be used for the field. * * The direct IO mapping operations will then mask off those bits * before doing the actual access, though that only happen when @@ -292,8 +286,8 @@ extern void _memcpy_toio(volatile void __iomem *dest, const void *src, */ #ifdef CONFIG_PPC_INDIRECT_MMIO -#define PCI_IO_IND_TOKEN_MASK 0x0fff000000000000ul -#define PCI_IO_IND_TOKEN_SHIFT 48 +#define PCI_IO_IND_TOKEN_SHIFT 52 +#define PCI_IO_IND_TOKEN_MASK (0xfful << PCI_IO_IND_TOKEN_SHIFT) #define PCI_FIX_ADDR(addr) \ ((PCI_IO_ADDR)(((unsigned long)(addr)) & ~PCI_IO_IND_TOKEN_MASK)) #define PCI_GET_ADDR_TOKEN(addr) \ From 2c7645b0f7d1014f2636393de7906c6bfd25939f Mon Sep 17 00:00:00 2001 From: Michael Ellerman <mpe@ellerman.id.au> Date: Mon, 12 Nov 2018 13:46:06 +1100 Subject: [PATCH 354/443] selftests/powerpc: Fix wild_bctr test to work on ppc64 The selftest I recently added to test branching to an out-of-bounds NIP doesn't work on 64-bit big endian. It does fail but not in the right way. That is it SEGVs trying to load from the opd at BAD_NIP, but it never gets as far as branching to BAD_NIP. To fix it we need to create an opd which is reachable but which holds the bad address. Fixes: b7683fc66eba ("selftests/powerpc: Add a test of wild bctr") Signed-off-by: Michael Ellerman <mpe@ellerman.id.au> --- tools/testing/selftests/powerpc/mm/wild_bctr.c | 16 +++++++++++++++- 1 file changed, 15 insertions(+), 1 deletion(-) diff --git a/tools/testing/selftests/powerpc/mm/wild_bctr.c b/tools/testing/selftests/powerpc/mm/wild_bctr.c index 1b0e9e9a2ddce..90469a9e49d40 100644 --- a/tools/testing/selftests/powerpc/mm/wild_bctr.c +++ b/tools/testing/selftests/powerpc/mm/wild_bctr.c @@ -105,6 +105,20 @@ static void dump_regs(void) } } +#ifdef _CALL_AIXDESC +struct opd { + unsigned long ip; + unsigned long toc; + unsigned long env; +}; +static struct opd bad_opd = { + .ip = BAD_NIP, +}; +#define BAD_FUNC (&bad_opd) +#else +#define BAD_FUNC BAD_NIP +#endif + int test_wild_bctr(void) { int (*func_ptr)(void); @@ -133,7 +147,7 @@ int test_wild_bctr(void) poison_regs(); - func_ptr = (int (*)(void))BAD_NIP; + func_ptr = (int (*)(void))BAD_FUNC; func_ptr(); FAIL_IF(1); /* we didn't segv? */ From c469933e772132aad040bd6a2adc8edf9ad6f825 Mon Sep 17 00:00:00 2001 From: Patrick Bellasi <patrick.bellasi@arm.com> Date: Mon, 5 Nov 2018 14:53:58 +0000 Subject: [PATCH 355/443] sched/fair: Fix cpu_util_wake() for 'execl' type workloads A ~10% regression has been reported for UnixBench's execl throughput test by Aaron Lu and Ye Xiaolong: https://lkml.org/lkml/2018/10/30/765 That test is pretty simple, it does a "recursive" execve() syscall on the same binary. Starting from the syscall, this sequence is possible: do_execve() do_execveat_common() __do_execve_file() sched_exec() select_task_rq_fair() <==| Task already enqueued find_idlest_cpu() find_idlest_group() capacity_spare_wake() <==| Functions not called from cpu_util_wake() | the wakeup path which means we can end up calling cpu_util_wake() not only from the "wakeup path", as its name would suggest. Indeed, the task doing an execve() syscall is already enqueued on the CPU we want to get the cpu_util_wake() for. The estimated utilization for a CPU computed in cpu_util_wake() was written under the assumption that function can be called only from the wakeup path. If instead the task is already enqueued, we end up with a utilization which does not remove the current task's contribution from the estimated utilization of the CPU. This will wrongly assume a reduced spare capacity on the current CPU and increase the chances to migrate the task on execve. The regression is tracked down to: commit d519329f72a6 ("sched/fair: Update util_est only on util_avg updates") because in that patch we turn on by default the UTIL_EST sched feature. However, the real issue is introduced by: commit f9be3e5961c5 ("sched/fair: Use util_est in LB and WU paths") Let's fix this by ensuring to always discount the task estimated utilization from the CPU's estimated utilization when the task is also the current one. The same benchmark of the bug report, executed on a dual socket 40 CPUs Intel(R) Xeon(R) CPU E5-2690 v2 @ 3.00GHz machine, reports these "Execl Throughput" figures (higher the better): mainline : 48136.5 lps mainline+fix : 55376.5 lps which correspond to a 15% speedup. Moreover, since {cpu_util,capacity_spare}_wake() are not really only used from the wakeup path, let's remove this ambiguity by using a better matching name: {cpu_util,capacity_spare}_without(). Since we are at that, let's also improve the existing documentation. Reported-by: Aaron Lu <aaron.lu@intel.com> Reported-by: Ye Xiaolong <xiaolong.ye@intel.com> Tested-by: Aaron Lu <aaron.lu@intel.com> Signed-off-by: Patrick Bellasi <patrick.bellasi@arm.com> Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org> Cc: Dietmar Eggemann <dietmar.eggemann@arm.com> Cc: Juri Lelli <juri.lelli@redhat.com> Cc: Linus Torvalds <torvalds@linux-foundation.org> Cc: Morten Rasmussen <morten.rasmussen@arm.com> Cc: Peter Zijlstra <peterz@infradead.org> Cc: Quentin Perret <quentin.perret@arm.com> Cc: Steve Muckle <smuckle@google.com> Cc: Suren Baghdasaryan <surenb@google.com> Cc: Thomas Gleixner <tglx@linutronix.de> Cc: Todd Kjos <tkjos@google.com> Cc: Vincent Guittot <vincent.guittot@linaro.org> Fixes: f9be3e5961c5 (sched/fair: Use util_est in LB and WU paths) Link: https://lore.kernel.org/lkml/20181025093100.GB13236@e110439-lin/ Signed-off-by: Ingo Molnar <mingo@kernel.org> --- kernel/sched/fair.c | 62 +++++++++++++++++++++++++++++++++++---------- 1 file changed, 48 insertions(+), 14 deletions(-) diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c index 3648d0300fdf9..ac855b2f47746 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c @@ -5674,11 +5674,11 @@ static int wake_affine(struct sched_domain *sd, struct task_struct *p, return target; } -static unsigned long cpu_util_wake(int cpu, struct task_struct *p); +static unsigned long cpu_util_without(int cpu, struct task_struct *p); -static unsigned long capacity_spare_wake(int cpu, struct task_struct *p) +static unsigned long capacity_spare_without(int cpu, struct task_struct *p) { - return max_t(long, capacity_of(cpu) - cpu_util_wake(cpu, p), 0); + return max_t(long, capacity_of(cpu) - cpu_util_without(cpu, p), 0); } /* @@ -5738,7 +5738,7 @@ find_idlest_group(struct sched_domain *sd, struct task_struct *p, avg_load += cfs_rq_load_avg(&cpu_rq(i)->cfs); - spare_cap = capacity_spare_wake(i, p); + spare_cap = capacity_spare_without(i, p); if (spare_cap > max_spare_cap) max_spare_cap = spare_cap; @@ -5889,8 +5889,8 @@ static inline int find_idlest_cpu(struct sched_domain *sd, struct task_struct *p return prev_cpu; /* - * We need task's util for capacity_spare_wake, sync it up to prev_cpu's - * last_update_time. + * We need task's util for capacity_spare_without, sync it up to + * prev_cpu's last_update_time. */ if (!(sd_flag & SD_BALANCE_FORK)) sync_entity_load_avg(&p->se); @@ -6216,10 +6216,19 @@ static inline unsigned long cpu_util(int cpu) } /* - * cpu_util_wake: Compute CPU utilization with any contributions from - * the waking task p removed. + * cpu_util_without: compute cpu utilization without any contributions from *p + * @cpu: the CPU which utilization is requested + * @p: the task which utilization should be discounted + * + * The utilization of a CPU is defined by the utilization of tasks currently + * enqueued on that CPU as well as tasks which are currently sleeping after an + * execution on that CPU. + * + * This method returns the utilization of the specified CPU by discounting the + * utilization of the specified task, whenever the task is currently + * contributing to the CPU utilization. */ -static unsigned long cpu_util_wake(int cpu, struct task_struct *p) +static unsigned long cpu_util_without(int cpu, struct task_struct *p) { struct cfs_rq *cfs_rq; unsigned int util; @@ -6231,7 +6240,7 @@ static unsigned long cpu_util_wake(int cpu, struct task_struct *p) cfs_rq = &cpu_rq(cpu)->cfs; util = READ_ONCE(cfs_rq->avg.util_avg); - /* Discount task's blocked util from CPU's util */ + /* Discount task's util from CPU's util */ util -= min_t(unsigned int, util, task_util(p)); /* @@ -6240,14 +6249,14 @@ static unsigned long cpu_util_wake(int cpu, struct task_struct *p) * a) if *p is the only task sleeping on this CPU, then: * cpu_util (== task_util) > util_est (== 0) * and thus we return: - * cpu_util_wake = (cpu_util - task_util) = 0 + * cpu_util_without = (cpu_util - task_util) = 0 * * b) if other tasks are SLEEPING on this CPU, which is now exiting * IDLE, then: * cpu_util >= task_util * cpu_util > util_est (== 0) * and thus we discount *p's blocked utilization to return: - * cpu_util_wake = (cpu_util - task_util) >= 0 + * cpu_util_without = (cpu_util - task_util) >= 0 * * c) if other tasks are RUNNABLE on that CPU and * util_est > cpu_util @@ -6260,8 +6269,33 @@ static unsigned long cpu_util_wake(int cpu, struct task_struct *p) * covered by the following code when estimated utilization is * enabled. */ - if (sched_feat(UTIL_EST)) - util = max(util, READ_ONCE(cfs_rq->avg.util_est.enqueued)); + if (sched_feat(UTIL_EST)) { + unsigned int estimated = + READ_ONCE(cfs_rq->avg.util_est.enqueued); + + /* + * Despite the following checks we still have a small window + * for a possible race, when an execl's select_task_rq_fair() + * races with LB's detach_task(): + * + * detach_task() + * p->on_rq = TASK_ON_RQ_MIGRATING; + * ---------------------------------- A + * deactivate_task() \ + * dequeue_task() + RaceTime + * util_est_dequeue() / + * ---------------------------------- B + * + * The additional check on "current == p" it's required to + * properly fix the execl regression and it helps in further + * reducing the chances for the above race. + */ + if (unlikely(task_on_rq_queued(p) || current == p)) { + estimated -= min_t(unsigned int, estimated, + (_task_util_est(p) | UTIL_AVG_UNCHANGED)); + } + util = max(util, estimated); + } /* * Utilization (estimated) can exceed the CPU capacity, thus let's From c10a8de0d32e95b0b8c7c17b6dc09baea5a5a899 Mon Sep 17 00:00:00 2001 From: Kan Liang <kan.liang@linux.intel.com> Date: Fri, 19 Oct 2018 10:04:18 -0700 Subject: [PATCH 356/443] perf/x86/intel/uncore: Add more IMC PCI IDs for KabyLake and CoffeeLake CPUs KabyLake and CoffeeLake CPUs have the same client uncore events as SkyLake. Add the PCI IDs for the KabyLake Y, U, S processor lines and CoffeeLake U, H, S processor lines. Signed-off-by: Kan Liang <kan.liang@linux.intel.com> Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org> Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com> Cc: Arnaldo Carvalho de Melo <acme@redhat.com> Cc: Jiri Olsa <jolsa@redhat.com> Cc: Linus Torvalds <torvalds@linux-foundation.org> Cc: Peter Zijlstra <peterz@infradead.org> Cc: Stephane Eranian <eranian@google.com> Cc: Thomas Gleixner <tglx@linutronix.de> Cc: Vince Weaver <vincent.weaver@maine.edu> Link: http://lkml.kernel.org/r/20181019170419.378-1-kan.liang@linux.intel.com Signed-off-by: Ingo Molnar <mingo@kernel.org> --- arch/x86/events/intel/uncore_snb.c | 115 ++++++++++++++++++++++++++++- 1 file changed, 114 insertions(+), 1 deletion(-) diff --git a/arch/x86/events/intel/uncore_snb.c b/arch/x86/events/intel/uncore_snb.c index 8527c3e1038b7..bfa25814fe5f2 100644 --- a/arch/x86/events/intel/uncore_snb.c +++ b/arch/x86/events/intel/uncore_snb.c @@ -15,6 +15,25 @@ #define PCI_DEVICE_ID_INTEL_SKL_HQ_IMC 0x1910 #define PCI_DEVICE_ID_INTEL_SKL_SD_IMC 0x190f #define PCI_DEVICE_ID_INTEL_SKL_SQ_IMC 0x191f +#define PCI_DEVICE_ID_INTEL_KBL_Y_IMC 0x590c +#define PCI_DEVICE_ID_INTEL_KBL_U_IMC 0x5904 +#define PCI_DEVICE_ID_INTEL_KBL_UQ_IMC 0x5914 +#define PCI_DEVICE_ID_INTEL_KBL_SD_IMC 0x590f +#define PCI_DEVICE_ID_INTEL_KBL_SQ_IMC 0x591f +#define PCI_DEVICE_ID_INTEL_CFL_2U_IMC 0x3ecc +#define PCI_DEVICE_ID_INTEL_CFL_4U_IMC 0x3ed0 +#define PCI_DEVICE_ID_INTEL_CFL_4H_IMC 0x3e10 +#define PCI_DEVICE_ID_INTEL_CFL_6H_IMC 0x3ec4 +#define PCI_DEVICE_ID_INTEL_CFL_2S_D_IMC 0x3e0f +#define PCI_DEVICE_ID_INTEL_CFL_4S_D_IMC 0x3e1f +#define PCI_DEVICE_ID_INTEL_CFL_6S_D_IMC 0x3ec2 +#define PCI_DEVICE_ID_INTEL_CFL_8S_D_IMC 0x3e30 +#define PCI_DEVICE_ID_INTEL_CFL_4S_W_IMC 0x3e18 +#define PCI_DEVICE_ID_INTEL_CFL_6S_W_IMC 0x3ec6 +#define PCI_DEVICE_ID_INTEL_CFL_8S_W_IMC 0x3e31 +#define PCI_DEVICE_ID_INTEL_CFL_4S_S_IMC 0x3e33 +#define PCI_DEVICE_ID_INTEL_CFL_6S_S_IMC 0x3eca +#define PCI_DEVICE_ID_INTEL_CFL_8S_S_IMC 0x3e32 /* SNB event control */ #define SNB_UNC_CTL_EV_SEL_MASK 0x000000ff @@ -569,7 +588,82 @@ static const struct pci_device_id skl_uncore_pci_ids[] = { PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_SKL_SQ_IMC), .driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0), }, - + { /* IMC */ + PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_KBL_Y_IMC), + .driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0), + }, + { /* IMC */ + PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_KBL_U_IMC), + .driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0), + }, + { /* IMC */ + PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_KBL_UQ_IMC), + .driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0), + }, + { /* IMC */ + PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_KBL_SD_IMC), + .driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0), + }, + { /* IMC */ + PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_KBL_SQ_IMC), + .driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0), + }, + { /* IMC */ + PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_CFL_2U_IMC), + .driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0), + }, + { /* IMC */ + PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_CFL_4U_IMC), + .driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0), + }, + { /* IMC */ + PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_CFL_4H_IMC), + .driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0), + }, + { /* IMC */ + PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_CFL_6H_IMC), + .driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0), + }, + { /* IMC */ + PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_CFL_2S_D_IMC), + .driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0), + }, + { /* IMC */ + PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_CFL_4S_D_IMC), + .driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0), + }, + { /* IMC */ + PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_CFL_6S_D_IMC), + .driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0), + }, + { /* IMC */ + PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_CFL_8S_D_IMC), + .driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0), + }, + { /* IMC */ + PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_CFL_4S_W_IMC), + .driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0), + }, + { /* IMC */ + PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_CFL_6S_W_IMC), + .driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0), + }, + { /* IMC */ + PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_CFL_8S_W_IMC), + .driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0), + }, + { /* IMC */ + PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_CFL_4S_S_IMC), + .driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0), + }, + { /* IMC */ + PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_CFL_6S_S_IMC), + .driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0), + }, + { /* IMC */ + PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_CFL_8S_S_IMC), + .driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0), + }, { /* end: all zeroes */ }, }; @@ -618,6 +712,25 @@ static const struct imc_uncore_pci_dev desktop_imc_pci_ids[] = { IMC_DEV(SKL_HQ_IMC, &skl_uncore_pci_driver), /* 6th Gen Core H Quad Core */ IMC_DEV(SKL_SD_IMC, &skl_uncore_pci_driver), /* 6th Gen Core S Dual Core */ IMC_DEV(SKL_SQ_IMC, &skl_uncore_pci_driver), /* 6th Gen Core S Quad Core */ + IMC_DEV(KBL_Y_IMC, &skl_uncore_pci_driver), /* 7th Gen Core Y */ + IMC_DEV(KBL_U_IMC, &skl_uncore_pci_driver), /* 7th Gen Core U */ + IMC_DEV(KBL_UQ_IMC, &skl_uncore_pci_driver), /* 7th Gen Core U Quad Core */ + IMC_DEV(KBL_SD_IMC, &skl_uncore_pci_driver), /* 7th Gen Core S Dual Core */ + IMC_DEV(KBL_SQ_IMC, &skl_uncore_pci_driver), /* 7th Gen Core S Quad Core */ + IMC_DEV(CFL_2U_IMC, &skl_uncore_pci_driver), /* 8th Gen Core U 2 Cores */ + IMC_DEV(CFL_4U_IMC, &skl_uncore_pci_driver), /* 8th Gen Core U 4 Cores */ + IMC_DEV(CFL_4H_IMC, &skl_uncore_pci_driver), /* 8th Gen Core H 4 Cores */ + IMC_DEV(CFL_6H_IMC, &skl_uncore_pci_driver), /* 8th Gen Core H 6 Cores */ + IMC_DEV(CFL_2S_D_IMC, &skl_uncore_pci_driver), /* 8th Gen Core S 2 Cores Desktop */ + IMC_DEV(CFL_4S_D_IMC, &skl_uncore_pci_driver), /* 8th Gen Core S 4 Cores Desktop */ + IMC_DEV(CFL_6S_D_IMC, &skl_uncore_pci_driver), /* 8th Gen Core S 6 Cores Desktop */ + IMC_DEV(CFL_8S_D_IMC, &skl_uncore_pci_driver), /* 8th Gen Core S 8 Cores Desktop */ + IMC_DEV(CFL_4S_W_IMC, &skl_uncore_pci_driver), /* 8th Gen Core S 4 Cores Work Station */ + IMC_DEV(CFL_6S_W_IMC, &skl_uncore_pci_driver), /* 8th Gen Core S 6 Cores Work Station */ + IMC_DEV(CFL_8S_W_IMC, &skl_uncore_pci_driver), /* 8th Gen Core S 8 Cores Work Station */ + IMC_DEV(CFL_4S_S_IMC, &skl_uncore_pci_driver), /* 8th Gen Core S 4 Cores Server */ + IMC_DEV(CFL_6S_S_IMC, &skl_uncore_pci_driver), /* 8th Gen Core S 6 Cores Server */ + IMC_DEV(CFL_8S_S_IMC, &skl_uncore_pci_driver), /* 8th Gen Core S 8 Cores Server */ { /* end marker */ } }; From 4d47d6407ac7b4b442a4e717488a3bb137398b6c Mon Sep 17 00:00:00 2001 From: Kan Liang <kan.liang@linux.intel.com> Date: Fri, 19 Oct 2018 10:04:19 -0700 Subject: [PATCH 357/443] perf/x86/intel/uncore: Support CoffeeLake 8th CBOX Coffee Lake has 8 core products which has 8 Cboxes. The 8th CBOX is mapped into different MSR space. Increase the num_boxes to 8 to handle the new products. It will not impact the previous platforms, SkyLake, KabyLake and earlier CoffeeLake. Because the num_boxes will be recalculated in uncore_cpu_init and doesn't exceed the x86_max_cores. Introduce a new box flag bit to indicate the 8th CBOX. Signed-off-by: Kan Liang <kan.liang@linux.intel.com> Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org> Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com> Cc: Arnaldo Carvalho de Melo <acme@redhat.com> Cc: Jiri Olsa <jolsa@redhat.com> Cc: Linus Torvalds <torvalds@linux-foundation.org> Cc: Peter Zijlstra <peterz@infradead.org> Cc: Stephane Eranian <eranian@google.com> Cc: Thomas Gleixner <tglx@linutronix.de> Cc: Vince Weaver <vincent.weaver@maine.edu> Link: http://lkml.kernel.org/r/20181019170419.378-2-kan.liang@linux.intel.com Signed-off-by: Ingo Molnar <mingo@kernel.org> --- arch/x86/events/intel/uncore.h | 33 ++++++++++++++++++++++-------- arch/x86/events/intel/uncore_snb.c | 6 +++++- 2 files changed, 30 insertions(+), 9 deletions(-) diff --git a/arch/x86/events/intel/uncore.h b/arch/x86/events/intel/uncore.h index e17ab885b1e92..cb46d602a6b8b 100644 --- a/arch/x86/events/intel/uncore.h +++ b/arch/x86/events/intel/uncore.h @@ -129,8 +129,15 @@ struct intel_uncore_box { struct intel_uncore_extra_reg shared_regs[0]; }; -#define UNCORE_BOX_FLAG_INITIATED 0 -#define UNCORE_BOX_FLAG_CTL_OFFS8 1 /* event config registers are 8-byte apart */ +/* CFL uncore 8th cbox MSRs */ +#define CFL_UNC_CBO_7_PERFEVTSEL0 0xf70 +#define CFL_UNC_CBO_7_PER_CTR0 0xf76 + +#define UNCORE_BOX_FLAG_INITIATED 0 +/* event config registers are 8-byte apart */ +#define UNCORE_BOX_FLAG_CTL_OFFS8 1 +/* CFL 8th CBOX has different MSR space */ +#define UNCORE_BOX_FLAG_CFL8_CBOX_MSR_OFFS 2 struct uncore_event_desc { struct kobj_attribute attr; @@ -297,17 +304,27 @@ unsigned int uncore_freerunning_counter(struct intel_uncore_box *box, static inline unsigned uncore_msr_event_ctl(struct intel_uncore_box *box, int idx) { - return box->pmu->type->event_ctl + - (box->pmu->type->pair_ctr_ctl ? 2 * idx : idx) + - uncore_msr_box_offset(box); + if (test_bit(UNCORE_BOX_FLAG_CFL8_CBOX_MSR_OFFS, &box->flags)) { + return CFL_UNC_CBO_7_PERFEVTSEL0 + + (box->pmu->type->pair_ctr_ctl ? 2 * idx : idx); + } else { + return box->pmu->type->event_ctl + + (box->pmu->type->pair_ctr_ctl ? 2 * idx : idx) + + uncore_msr_box_offset(box); + } } static inline unsigned uncore_msr_perf_ctr(struct intel_uncore_box *box, int idx) { - return box->pmu->type->perf_ctr + - (box->pmu->type->pair_ctr_ctl ? 2 * idx : idx) + - uncore_msr_box_offset(box); + if (test_bit(UNCORE_BOX_FLAG_CFL8_CBOX_MSR_OFFS, &box->flags)) { + return CFL_UNC_CBO_7_PER_CTR0 + + (box->pmu->type->pair_ctr_ctl ? 2 * idx : idx); + } else { + return box->pmu->type->perf_ctr + + (box->pmu->type->pair_ctr_ctl ? 2 * idx : idx) + + uncore_msr_box_offset(box); + } } static inline diff --git a/arch/x86/events/intel/uncore_snb.c b/arch/x86/events/intel/uncore_snb.c index bfa25814fe5f2..2593b0d7aeee6 100644 --- a/arch/x86/events/intel/uncore_snb.c +++ b/arch/x86/events/intel/uncore_snb.c @@ -221,6 +221,10 @@ static void skl_uncore_msr_init_box(struct intel_uncore_box *box) wrmsrl(SKL_UNC_PERF_GLOBAL_CTL, SNB_UNC_GLOBAL_CTL_EN | SKL_UNC_GLOBAL_CTL_CORE_ALL); } + + /* The 8th CBOX has different MSR space */ + if (box->pmu->pmu_idx == 7) + __set_bit(UNCORE_BOX_FLAG_CFL8_CBOX_MSR_OFFS, &box->flags); } static void skl_uncore_msr_enable_box(struct intel_uncore_box *box) @@ -247,7 +251,7 @@ static struct intel_uncore_ops skl_uncore_msr_ops = { static struct intel_uncore_type skl_uncore_cbox = { .name = "cbox", .num_counters = 4, - .num_boxes = 5, + .num_boxes = 8, .perf_ctr_bits = 44, .fixed_ctr_bits = 48, .perf_ctr = SNB_UNC_CBO_0_PER_CTR0, From 1e9c75fb9c47a75a9aec0cd17db5f6dc36b58e00 Mon Sep 17 00:00:00 2001 From: Benjamin Coddington <bcodding@redhat.com> Date: Wed, 3 Oct 2018 10:18:33 -0400 Subject: [PATCH 358/443] mnt: fix __detach_mounts infinite loop Since commit ff17fa561a04 ("d_invalidate(): unhash immediately") immediately unhashes the dentry, we'll never return the mountpoint in lookup_mountpoint(), which can lead to an unbreakable loop in d_invalidate(). I have reports of NFS clients getting into this condition after the server removes an export of an existing mount created through follow_automount(), but I suspect there are various other ways to produce this problem if we hunt down users of d_invalidate(). For example, it is possible to get into this state by using XFS' d_invalidate() call in xfs_vn_unlink(): truncate -s 100m img{1,2} mkfs.xfs -q -n version=ci img1 mkfs.xfs -q -n version=ci img2 mkdir -p /mnt/xfs mount img1 /mnt/xfs mkdir /mnt/xfs/sub1 mount img2 /mnt/xfs/sub1 cat > /mnt/xfs/sub1/foo & umount -l /mnt/xfs/sub1 mount img2 /mnt/xfs/sub1 mount --make-private /mnt/xfs mkdir /mnt/xfs/sub2 mount --move /mnt/xfs/sub1 /mnt/xfs/sub2 rmdir /mnt/xfs/sub1 Fix this by moving the check for an unlinked dentry out of the detach_mounts() path. Fixes: ff17fa561a04 ("d_invalidate(): unhash immediately") Cc: stable@vger.kernel.org Reviewed-by: "Eric W. Biederman" <ebiederm@xmission.com> Signed-off-by: Benjamin Coddington <bcodding@redhat.com> Signed-off-by: Eric W. Biederman <ebiederm@xmission.com> --- fs/namespace.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/fs/namespace.c b/fs/namespace.c index 74f64294a4108..a7f91265ea671 100644 --- a/fs/namespace.c +++ b/fs/namespace.c @@ -695,9 +695,6 @@ static struct mountpoint *lookup_mountpoint(struct dentry *dentry) hlist_for_each_entry(mp, chain, m_hash) { if (mp->m_dentry == dentry) { - /* might be worth a WARN_ON() */ - if (d_unlinked(dentry)) - return ERR_PTR(-ENOENT); mp->m_count++; return mp; } @@ -711,6 +708,9 @@ static struct mountpoint *get_mountpoint(struct dentry *dentry) int ret; if (d_mountpoint(dentry)) { + /* might be worth a WARN_ON() */ + if (d_unlinked(dentry)) + return ERR_PTR(-ENOENT); mountpoint: read_seqlock_excl(&mount_lock); mp = lookup_mountpoint(dentry); From e0c827aca0730b51f38081aa4e8ecf0912aab55f Mon Sep 17 00:00:00 2001 From: Laurent Pinchart <laurent.pinchart@ideasonboard.com> Date: Sat, 10 Nov 2018 13:16:51 +0200 Subject: [PATCH 359/443] drm/omap: Populate DSS children in omapdss driver The DSS DT node contains children that describe the DSS components (DISPC and internal encoders). Each of those components is handled by a platform driver, and thus needs to be backed by a platform device. The corresponding platform devices are created in mach-omap2 code by a call to of_platform_populate(). While this approach has worked so far, it doesn't model the hardware architecture very well, as it creates child devices before the parent is ready to handle them. This would be akin to creating I2C slaves before the I2C master is available. The task can be easily performed in the omapdss driver code instead, simplifying mach-omap2 code. We however can't remove the mach-omap2 code completely as the omap2fb driver still depends on it, but we can move it to the omap2fb-specific section, where it can stay until the omap2fb driver gets removed. This has the added benefit of not allowing DSS components to probe before the DSS itself, which led to runtime PM issues when the DSS probe is deferred. Fixes: 27d624527d99 ("drm/omap: dss: Acquire next dssdev at probe time") Signed-off-by: Laurent Pinchart <laurent.pinchart@ideasonboard.com> Acked-by: Tony Lindgren <tony@atomide.com> Reviewed-by: Sebastian Reichel <sebastian.reichel@collabora.com> Signed-off-by: Tomi Valkeinen <tomi.valkeinen@ti.com> Link: https://patchwork.freedesktop.org/patch/msgid/20181110111654.4387-2-laurent.pinchart@ideasonboard.com --- arch/arm/mach-omap2/display.c | 111 ++++++++++++++---------------- drivers/gpu/drm/omapdrm/dss/dss.c | 11 ++- 2 files changed, 63 insertions(+), 59 deletions(-) diff --git a/arch/arm/mach-omap2/display.c b/arch/arm/mach-omap2/display.c index 9500b6e273801..f86b72d1d59e5 100644 --- a/arch/arm/mach-omap2/display.c +++ b/arch/arm/mach-omap2/display.c @@ -209,11 +209,61 @@ static int __init omapdss_init_fbdev(void) return 0; } -#else -static inline int omapdss_init_fbdev(void) + +static const char * const omapdss_compat_names[] __initconst = { + "ti,omap2-dss", + "ti,omap3-dss", + "ti,omap4-dss", + "ti,omap5-dss", + "ti,dra7-dss", +}; + +static struct device_node * __init omapdss_find_dss_of_node(void) { - return 0; + struct device_node *node; + int i; + + for (i = 0; i < ARRAY_SIZE(omapdss_compat_names); ++i) { + node = of_find_compatible_node(NULL, NULL, + omapdss_compat_names[i]); + if (node) + return node; + } + + return NULL; } + +static int __init omapdss_init_of(void) +{ + int r; + struct device_node *node; + struct platform_device *pdev; + + /* only create dss helper devices if dss is enabled in the .dts */ + + node = omapdss_find_dss_of_node(); + if (!node) + return 0; + + if (!of_device_is_available(node)) + return 0; + + pdev = of_find_device_by_node(node); + + if (!pdev) { + pr_err("Unable to find DSS platform device\n"); + return -ENODEV; + } + + r = of_platform_populate(node, NULL, NULL, &pdev->dev); + if (r) { + pr_err("Unable to populate DSS submodule devices\n"); + return r; + } + + return omapdss_init_fbdev(); +} +omap_device_initcall(omapdss_init_of); #endif /* CONFIG_FB_OMAP2 */ static void dispc_disable_outputs(void) @@ -361,58 +411,3 @@ int omap_dss_reset(struct omap_hwmod *oh) return r; } - -static const char * const omapdss_compat_names[] __initconst = { - "ti,omap2-dss", - "ti,omap3-dss", - "ti,omap4-dss", - "ti,omap5-dss", - "ti,dra7-dss", -}; - -static struct device_node * __init omapdss_find_dss_of_node(void) -{ - struct device_node *node; - int i; - - for (i = 0; i < ARRAY_SIZE(omapdss_compat_names); ++i) { - node = of_find_compatible_node(NULL, NULL, - omapdss_compat_names[i]); - if (node) - return node; - } - - return NULL; -} - -static int __init omapdss_init_of(void) -{ - int r; - struct device_node *node; - struct platform_device *pdev; - - /* only create dss helper devices if dss is enabled in the .dts */ - - node = omapdss_find_dss_of_node(); - if (!node) - return 0; - - if (!of_device_is_available(node)) - return 0; - - pdev = of_find_device_by_node(node); - - if (!pdev) { - pr_err("Unable to find DSS platform device\n"); - return -ENODEV; - } - - r = of_platform_populate(node, NULL, NULL, &pdev->dev); - if (r) { - pr_err("Unable to populate DSS submodule devices\n"); - return r; - } - - return omapdss_init_fbdev(); -} -omap_device_initcall(omapdss_init_of); diff --git a/drivers/gpu/drm/omapdrm/dss/dss.c b/drivers/gpu/drm/omapdrm/dss/dss.c index 1aaf260aa9b86..7553c7fc1c457 100644 --- a/drivers/gpu/drm/omapdrm/dss/dss.c +++ b/drivers/gpu/drm/omapdrm/dss/dss.c @@ -1484,16 +1484,23 @@ static int dss_probe(struct platform_device *pdev) dss); /* Add all the child devices as components. */ + r = of_platform_populate(pdev->dev.of_node, NULL, NULL, &pdev->dev); + if (r) + goto err_uninit_debugfs; + omapdss_gather_components(&pdev->dev); device_for_each_child(&pdev->dev, &match, dss_add_child_component); r = component_master_add_with_match(&pdev->dev, &dss_component_ops, match); if (r) - goto err_uninit_debugfs; + goto err_of_depopulate; return 0; +err_of_depopulate: + of_platform_depopulate(&pdev->dev); + err_uninit_debugfs: dss_debugfs_remove_file(dss->debugfs.clk); dss_debugfs_remove_file(dss->debugfs.dss); @@ -1522,6 +1529,8 @@ static int dss_remove(struct platform_device *pdev) { struct dss_device *dss = platform_get_drvdata(pdev); + of_platform_depopulate(&pdev->dev); + component_master_del(&pdev->dev, &dss_component_ops); dss_debugfs_remove_file(dss->debugfs.clk); From f8523b64d2d2f94bb429c15166d7601d39243c4d Mon Sep 17 00:00:00 2001 From: Laurent Pinchart <laurent.pinchart@ideasonboard.com> Date: Sat, 10 Nov 2018 13:16:52 +0200 Subject: [PATCH 360/443] drm/omap: hdmi4: Ensure the device is active during bind The bind function performs hardware access (in hdmi4_cec_init()) and thus requires the device to be active. Ensure this by surrounding the bind function by hdmi_runtime_get() and hdmi_runtime_put() calls. Fixes: 27d624527d99 ("drm/omap: dss: Acquire next dssdev at probe time") Signed-off-by: Laurent Pinchart <laurent.pinchart@ideasonboard.com> Reviewed-by: Sebastian Reichel <sebastian.reichel@collabora.com> Signed-off-by: Tomi Valkeinen <tomi.valkeinen@ti.com> Link: https://patchwork.freedesktop.org/patch/msgid/20181110111654.4387-3-laurent.pinchart@ideasonboard.com --- drivers/gpu/drm/omapdrm/dss/hdmi4.c | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/omapdrm/dss/hdmi4.c b/drivers/gpu/drm/omapdrm/dss/hdmi4.c index cf6230eac31a3..073fa462930a2 100644 --- a/drivers/gpu/drm/omapdrm/dss/hdmi4.c +++ b/drivers/gpu/drm/omapdrm/dss/hdmi4.c @@ -635,10 +635,14 @@ static int hdmi4_bind(struct device *dev, struct device *master, void *data) hdmi->dss = dss; - r = hdmi_pll_init(dss, hdmi->pdev, &hdmi->pll, &hdmi->wp); + r = hdmi_runtime_get(hdmi); if (r) return r; + r = hdmi_pll_init(dss, hdmi->pdev, &hdmi->pll, &hdmi->wp); + if (r) + goto err_runtime_put; + r = hdmi4_cec_init(hdmi->pdev, &hdmi->core, &hdmi->wp); if (r) goto err_pll_uninit; @@ -652,12 +656,16 @@ static int hdmi4_bind(struct device *dev, struct device *master, void *data) hdmi->debugfs = dss_debugfs_create_file(dss, "hdmi", hdmi_dump_regs, hdmi); + hdmi_runtime_put(hdmi); + return 0; err_cec_uninit: hdmi4_cec_uninit(&hdmi->core); err_pll_uninit: hdmi_pll_uninit(&hdmi->pll); +err_runtime_put: + hdmi_runtime_put(hdmi); return r; } From 350c03e880038bf60184500bab9025d3361c0b0e Mon Sep 17 00:00:00 2001 From: Laurent Pinchart <laurent.pinchart@ideasonboard.com> Date: Sat, 10 Nov 2018 13:16:53 +0200 Subject: [PATCH 361/443] drm/omap: dsi: Ensure the device is active during probe The probe function performs hardware access to read the number of supported data lanes from a configuration register and thus requires the device to be active. Ensure this by surrounding the access with dsi_runtime_get() and dsi_runtime_put() calls. Fixes: edb715dffdee ("drm/omap: dss: dsi: Move initialization code from bind to probe") Signed-off-by: Laurent Pinchart <laurent.pinchart@ideasonboard.com> Acked-by: Tony Lindgren <tony@atomide.com> Reviewed-by: Sebastian Reichel <sebastian.reichel@collabora.com> Signed-off-by: Tomi Valkeinen <tomi.valkeinen@ti.com> Link: https://patchwork.freedesktop.org/patch/msgid/20181110111654.4387-4-laurent.pinchart@ideasonboard.com --- drivers/gpu/drm/omapdrm/dss/dsi.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/omapdrm/dss/dsi.c b/drivers/gpu/drm/omapdrm/dss/dsi.c index 394c129cfb3bb..b9d5ad7e67d8a 100644 --- a/drivers/gpu/drm/omapdrm/dss/dsi.c +++ b/drivers/gpu/drm/omapdrm/dss/dsi.c @@ -5409,11 +5409,14 @@ static int dsi_probe(struct platform_device *pdev) /* DSI on OMAP3 doesn't have register DSI_GNQ, set number * of data to 3 by default */ - if (dsi->data->quirks & DSI_QUIRK_GNQ) + if (dsi->data->quirks & DSI_QUIRK_GNQ) { + dsi_runtime_get(dsi); /* NB_DATA_LANES */ dsi->num_lanes_supported = 1 + REG_GET(dsi, DSI_GNQ, 11, 9); - else + dsi_runtime_put(dsi); + } else { dsi->num_lanes_supported = 3; + } r = dsi_init_output(dsi); if (r) From 24ec84e854c68ceda59a26027114eb7f260f9411 Mon Sep 17 00:00:00 2001 From: Laurent Pinchart <laurent.pinchart@ideasonboard.com> Date: Sat, 10 Nov 2018 13:16:54 +0200 Subject: [PATCH 362/443] drm/omap: Move DISPC runtime PM handling to omapdrm The internal encoders (DSI, HDMI4, HDMI5 and VENC) runtime PM handlers attempt to manage the runtime PM state of the connected DISPC, based on the rationale that the DISPC providing data to the encoders requires ensuring that the display is active whenever the encoders are active. While the DISPC provides data to the encoders, it doesn't as such constitute a resource that encoders require in order to be taken out of suspend, contrary to for instance a functional clock or a power supply. Encoders registers can be accessed without the DISPC being active, and while the encoders will not output any video stream without being fed by the DISPC, the DISPC PM state doesn't influence the encoders PM state. For this reason the DISPC PM state is better managed from the omapdrm driver, in the CRTC enable and disable operations. This allows the encoders PM state to be handled separately from the DISPC, and in particular at times when the DISPC may not be available (for instance at probe due to the DSS probe being deferred, or at remove time du to the DISPC being already removed). Fixes: edb715dffdee ("drm/omap: dss: dsi: Move initialization code from bind to probe") Signed-off-by: Laurent Pinchart <laurent.pinchart@ideasonboard.com> Reviewed-by: Sebastian Reichel <sebastian.reichel@collabora.com> Signed-off-by: Tomi Valkeinen <tomi.valkeinen@ti.com> Link: https://patchwork.freedesktop.org/patch/msgid/20181110111654.4387-5-laurent.pinchart@ideasonboard.com --- drivers/gpu/drm/omapdrm/dss/dsi.c | 7 ------- drivers/gpu/drm/omapdrm/dss/hdmi4.c | 27 --------------------------- drivers/gpu/drm/omapdrm/dss/hdmi5.c | 27 --------------------------- drivers/gpu/drm/omapdrm/dss/venc.c | 7 ------- drivers/gpu/drm/omapdrm/omap_crtc.c | 6 ++++++ 5 files changed, 6 insertions(+), 68 deletions(-) diff --git a/drivers/gpu/drm/omapdrm/dss/dsi.c b/drivers/gpu/drm/omapdrm/dss/dsi.c index b9d5ad7e67d8a..36123c086d97b 100644 --- a/drivers/gpu/drm/omapdrm/dss/dsi.c +++ b/drivers/gpu/drm/omapdrm/dss/dsi.c @@ -5473,19 +5473,12 @@ static int dsi_runtime_suspend(struct device *dev) /* wait for current handler to finish before turning the DSI off */ synchronize_irq(dsi->irq); - dispc_runtime_put(dsi->dss->dispc); - return 0; } static int dsi_runtime_resume(struct device *dev) { struct dsi_data *dsi = dev_get_drvdata(dev); - int r; - - r = dispc_runtime_get(dsi->dss->dispc); - if (r) - return r; dsi->is_enabled = true; /* ensure the irq handler sees the is_enabled value */ diff --git a/drivers/gpu/drm/omapdrm/dss/hdmi4.c b/drivers/gpu/drm/omapdrm/dss/hdmi4.c index 073fa462930a2..aabdda394c9c6 100644 --- a/drivers/gpu/drm/omapdrm/dss/hdmi4.c +++ b/drivers/gpu/drm/omapdrm/dss/hdmi4.c @@ -841,32 +841,6 @@ static int hdmi4_remove(struct platform_device *pdev) return 0; } -static int hdmi_runtime_suspend(struct device *dev) -{ - struct omap_hdmi *hdmi = dev_get_drvdata(dev); - - dispc_runtime_put(hdmi->dss->dispc); - - return 0; -} - -static int hdmi_runtime_resume(struct device *dev) -{ - struct omap_hdmi *hdmi = dev_get_drvdata(dev); - int r; - - r = dispc_runtime_get(hdmi->dss->dispc); - if (r < 0) - return r; - - return 0; -} - -static const struct dev_pm_ops hdmi_pm_ops = { - .runtime_suspend = hdmi_runtime_suspend, - .runtime_resume = hdmi_runtime_resume, -}; - static const struct of_device_id hdmi_of_match[] = { { .compatible = "ti,omap4-hdmi", }, {}, @@ -877,7 +851,6 @@ struct platform_driver omapdss_hdmi4hw_driver = { .remove = hdmi4_remove, .driver = { .name = "omapdss_hdmi", - .pm = &hdmi_pm_ops, .of_match_table = hdmi_of_match, .suppress_bind_attrs = true, }, diff --git a/drivers/gpu/drm/omapdrm/dss/hdmi5.c b/drivers/gpu/drm/omapdrm/dss/hdmi5.c index b0e4a7463f8c8..9e8556f67a291 100644 --- a/drivers/gpu/drm/omapdrm/dss/hdmi5.c +++ b/drivers/gpu/drm/omapdrm/dss/hdmi5.c @@ -825,32 +825,6 @@ static int hdmi5_remove(struct platform_device *pdev) return 0; } -static int hdmi_runtime_suspend(struct device *dev) -{ - struct omap_hdmi *hdmi = dev_get_drvdata(dev); - - dispc_runtime_put(hdmi->dss->dispc); - - return 0; -} - -static int hdmi_runtime_resume(struct device *dev) -{ - struct omap_hdmi *hdmi = dev_get_drvdata(dev); - int r; - - r = dispc_runtime_get(hdmi->dss->dispc); - if (r < 0) - return r; - - return 0; -} - -static const struct dev_pm_ops hdmi_pm_ops = { - .runtime_suspend = hdmi_runtime_suspend, - .runtime_resume = hdmi_runtime_resume, -}; - static const struct of_device_id hdmi_of_match[] = { { .compatible = "ti,omap5-hdmi", }, { .compatible = "ti,dra7-hdmi", }, @@ -862,7 +836,6 @@ struct platform_driver omapdss_hdmi5hw_driver = { .remove = hdmi5_remove, .driver = { .name = "omapdss_hdmi5", - .pm = &hdmi_pm_ops, .of_match_table = hdmi_of_match, .suppress_bind_attrs = true, }, diff --git a/drivers/gpu/drm/omapdrm/dss/venc.c b/drivers/gpu/drm/omapdrm/dss/venc.c index ff0b18c8e4ace..b5f52727f8b17 100644 --- a/drivers/gpu/drm/omapdrm/dss/venc.c +++ b/drivers/gpu/drm/omapdrm/dss/venc.c @@ -946,19 +946,12 @@ static int venc_runtime_suspend(struct device *dev) if (venc->tv_dac_clk) clk_disable_unprepare(venc->tv_dac_clk); - dispc_runtime_put(venc->dss->dispc); - return 0; } static int venc_runtime_resume(struct device *dev) { struct venc_device *venc = dev_get_drvdata(dev); - int r; - - r = dispc_runtime_get(venc->dss->dispc); - if (r < 0) - return r; if (venc->tv_dac_clk) clk_prepare_enable(venc->tv_dac_clk); diff --git a/drivers/gpu/drm/omapdrm/omap_crtc.c b/drivers/gpu/drm/omapdrm/omap_crtc.c index 62928ec0e7db7..caffc547ef97e 100644 --- a/drivers/gpu/drm/omapdrm/omap_crtc.c +++ b/drivers/gpu/drm/omapdrm/omap_crtc.c @@ -350,11 +350,14 @@ static void omap_crtc_arm_event(struct drm_crtc *crtc) static void omap_crtc_atomic_enable(struct drm_crtc *crtc, struct drm_crtc_state *old_state) { + struct omap_drm_private *priv = crtc->dev->dev_private; struct omap_crtc *omap_crtc = to_omap_crtc(crtc); int ret; DBG("%s", omap_crtc->name); + priv->dispc_ops->runtime_get(priv->dispc); + spin_lock_irq(&crtc->dev->event_lock); drm_crtc_vblank_on(crtc); ret = drm_crtc_vblank_get(crtc); @@ -367,6 +370,7 @@ static void omap_crtc_atomic_enable(struct drm_crtc *crtc, static void omap_crtc_atomic_disable(struct drm_crtc *crtc, struct drm_crtc_state *old_state) { + struct omap_drm_private *priv = crtc->dev->dev_private; struct omap_crtc *omap_crtc = to_omap_crtc(crtc); DBG("%s", omap_crtc->name); @@ -379,6 +383,8 @@ static void omap_crtc_atomic_disable(struct drm_crtc *crtc, spin_unlock_irq(&crtc->dev->event_lock); drm_crtc_vblank_off(crtc); + + priv->dispc_ops->runtime_put(priv->dispc); } static enum drm_mode_status omap_crtc_mode_valid(struct drm_crtc *crtc, From cbed7545db7ae5907d7dc9d4002717d46cae29e9 Mon Sep 17 00:00:00 2001 From: Tony Lindgren <tony@atomide.com> Date: Tue, 6 Nov 2018 07:28:02 -0800 Subject: [PATCH 363/443] drm/omap: dsi: Fix missing of_platform_depopulate() We're missing a call to of_platform_depopulate() on errors for dsi. Looks like dss is already doing this. Signed-off-by: Tony Lindgren <tony@atomide.com> Reviewed-by: Laurent Pinchart <laurent.pinchart@ideasonboard.com> Reviewed-by: Sebastian Reichel <sebastian.reichel@collabora.com> Signed-off-by: Tomi Valkeinen <tomi.valkeinen@ti.com> Link: https://patchwork.freedesktop.org/patch/msgid/20181106152802.38599-1-tony@atomide.com --- drivers/gpu/drm/omapdrm/dss/dsi.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/omapdrm/dss/dsi.c b/drivers/gpu/drm/omapdrm/dss/dsi.c index 36123c086d97b..0a485c5b982eb 100644 --- a/drivers/gpu/drm/omapdrm/dss/dsi.c +++ b/drivers/gpu/drm/omapdrm/dss/dsi.c @@ -5429,15 +5429,19 @@ static int dsi_probe(struct platform_device *pdev) } r = of_platform_populate(dev->of_node, NULL, NULL, dev); - if (r) + if (r) { DSSERR("Failed to populate DSI child devices: %d\n", r); + goto err_uninit_output; + } r = component_add(&pdev->dev, &dsi_component_ops); if (r) - goto err_uninit_output; + goto err_of_depopulate; return 0; +err_of_depopulate: + of_platform_depopulate(dev); err_uninit_output: dsi_uninit_output(dsi); err_pm_disable: From 899a42f836678a595f7d2bc36a5a0c2b03d08cbc Mon Sep 17 00:00:00 2001 From: Russell King <rmk+kernel@armlinux.org.uk> Date: Thu, 19 Jul 2018 11:42:36 +0100 Subject: [PATCH 364/443] ARM: make lookup_processor_type() non-__init Move lookup_processor_type() out of the __init section so it is callable from (eg) the secondary startup code during hotplug. Reviewed-by: Julien Thierry <julien.thierry@arm.com> Signed-off-by: Russell King <rmk+kernel@armlinux.org.uk> --- arch/arm/kernel/head-common.S | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/arch/arm/kernel/head-common.S b/arch/arm/kernel/head-common.S index 6e0375e7db055..997b02302c314 100644 --- a/arch/arm/kernel/head-common.S +++ b/arch/arm/kernel/head-common.S @@ -145,6 +145,9 @@ __mmap_switched_data: #endif .size __mmap_switched_data, . - __mmap_switched_data + __FINIT + .text + /* * This provides a C-API version of __lookup_processor_type */ @@ -156,9 +159,6 @@ ENTRY(lookup_processor_type) ldmfd sp!, {r4 - r6, r9, pc} ENDPROC(lookup_processor_type) - __FINIT - .text - /* * Read processor ID register (CP#15, CR0), and look up in the linker-built * supported processor list. Note that we can't use the absolute addresses From 65987a8553061515b5851b472081aedb9837a391 Mon Sep 17 00:00:00 2001 From: Russell King <rmk+kernel@armlinux.org.uk> Date: Thu, 19 Jul 2018 11:59:56 +0100 Subject: [PATCH 365/443] ARM: split out processor lookup Split out the lookup of the processor type and associated error handling from the rest of setup_processor() - we will need to use this in the secondary CPU bringup path for big.Little Spectre variant 2 mitigation. Reviewed-by: Julien Thierry <julien.thierry@arm.com> Signed-off-by: Russell King <rmk+kernel@armlinux.org.uk> --- arch/arm/include/asm/cputype.h | 1 + arch/arm/kernel/setup.c | 31 +++++++++++++++++++------------ 2 files changed, 20 insertions(+), 12 deletions(-) diff --git a/arch/arm/include/asm/cputype.h b/arch/arm/include/asm/cputype.h index 26021980504db..f6df4bb4e543f 100644 --- a/arch/arm/include/asm/cputype.h +++ b/arch/arm/include/asm/cputype.h @@ -107,6 +107,7 @@ #define ARM_CPU_PART_SCORPION 0x510002d0 extern unsigned int processor_id; +struct proc_info_list *lookup_processor(u32 midr); #ifdef CONFIG_CPU_CP15 #define read_cpuid(reg) \ diff --git a/arch/arm/kernel/setup.c b/arch/arm/kernel/setup.c index fc40a2b40595b..05a4eb6b0d01b 100644 --- a/arch/arm/kernel/setup.c +++ b/arch/arm/kernel/setup.c @@ -667,22 +667,29 @@ static void __init smp_build_mpidr_hash(void) } #endif -static void __init setup_processor(void) +/* + * locate processor in the list of supported processor types. The linker + * builds this table for us from the entries in arch/arm/mm/proc-*.S + */ +struct proc_info_list *lookup_processor(u32 midr) { - struct proc_info_list *list; + struct proc_info_list *list = lookup_processor_type(midr); - /* - * locate processor in the list of supported processor - * types. The linker builds this table for us from the - * entries in arch/arm/mm/proc-*.S - */ - list = lookup_processor_type(read_cpuid_id()); if (!list) { - pr_err("CPU configuration botched (ID %08x), unable to continue.\n", - read_cpuid_id()); - while (1); + pr_err("CPU%u: configuration botched (ID %08x), CPU halted\n", + smp_processor_id(), midr); + while (1) + /* can't use cpu_relax() here as it may require MMU setup */; } + return list; +} + +static void __init setup_processor(void) +{ + unsigned int midr = read_cpuid_id(); + struct proc_info_list *list = lookup_processor(midr); + cpu_name = list->cpu_name; __cpu_architecture = __get_cpu_architecture(); @@ -700,7 +707,7 @@ static void __init setup_processor(void) #endif pr_info("CPU: %s [%08x] revision %d (ARMv%s), cr=%08lx\n", - cpu_name, read_cpuid_id(), read_cpuid_id() & 15, + list->cpu_name, midr, midr & 15, proc_arch[cpu_architecture()], get_cr()); snprintf(init_utsname()->machine, __NEW_UTS_LEN + 1, "%s%c", From 945aceb1db8885d3a35790cf2e810f681db52756 Mon Sep 17 00:00:00 2001 From: Russell King <rmk+kernel@armlinux.org.uk> Date: Thu, 19 Jul 2018 12:43:03 +0100 Subject: [PATCH 366/443] ARM: clean up per-processor check_bugs method call Call the per-processor type check_bugs() method in the same way as we do other per-processor functions - move the "processor." detail into proc-fns.h. Reviewed-by: Julien Thierry <julien.thierry@arm.com> Signed-off-by: Russell King <rmk+kernel@armlinux.org.uk> --- arch/arm/include/asm/proc-fns.h | 1 + arch/arm/kernel/bugs.c | 4 ++-- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/arch/arm/include/asm/proc-fns.h b/arch/arm/include/asm/proc-fns.h index e25f4392e1b28..30c499146320d 100644 --- a/arch/arm/include/asm/proc-fns.h +++ b/arch/arm/include/asm/proc-fns.h @@ -99,6 +99,7 @@ extern void cpu_do_suspend(void *); extern void cpu_do_resume(void *); #else #define cpu_proc_init processor._proc_init +#define cpu_check_bugs processor.check_bugs #define cpu_proc_fin processor._proc_fin #define cpu_reset processor.reset #define cpu_do_idle processor._do_idle diff --git a/arch/arm/kernel/bugs.c b/arch/arm/kernel/bugs.c index 7be5113101915..d41d3598e5e54 100644 --- a/arch/arm/kernel/bugs.c +++ b/arch/arm/kernel/bugs.c @@ -6,8 +6,8 @@ void check_other_bugs(void) { #ifdef MULTI_CPU - if (processor.check_bugs) - processor.check_bugs(); + if (cpu_check_bugs) + cpu_check_bugs(); #endif } From e209950fdd065d2cc46e6338e47e52841b830cba Mon Sep 17 00:00:00 2001 From: Russell King <rmk+kernel@armlinux.org.uk> Date: Thu, 19 Jul 2018 12:17:38 +0100 Subject: [PATCH 367/443] ARM: add PROC_VTABLE and PROC_TABLE macros Allow the way we access members of the processor vtable to be changed at compile time. We will need to move to per-CPU vtables to fix the Spectre variant 2 issues on big.Little systems. However, we have a couple of calls that do not need the vtable treatment, and indeed cause a kernel warning due to the (later) use of smp_processor_id(), so also introduce the PROC_TABLE macro for these which always use CPU 0's function pointers. Reviewed-by: Julien Thierry <julien.thierry@arm.com> Signed-off-by: Russell King <rmk+kernel@armlinux.org.uk> --- arch/arm/include/asm/proc-fns.h | 39 ++++++++++++++++++++++----------- arch/arm/kernel/setup.c | 4 +--- 2 files changed, 27 insertions(+), 16 deletions(-) diff --git a/arch/arm/include/asm/proc-fns.h b/arch/arm/include/asm/proc-fns.h index 30c499146320d..c259cc49c6415 100644 --- a/arch/arm/include/asm/proc-fns.h +++ b/arch/arm/include/asm/proc-fns.h @@ -23,7 +23,7 @@ struct mm_struct; /* * Don't change this structure - ASM code relies on it. */ -extern struct processor { +struct processor { /* MISC * get data abort address/flags */ @@ -79,9 +79,13 @@ extern struct processor { unsigned int suspend_size; void (*do_suspend)(void *); void (*do_resume)(void *); -} processor; +}; #ifndef MULTI_CPU +static inline void init_proc_vtable(const struct processor *p) +{ +} + extern void cpu_proc_init(void); extern void cpu_proc_fin(void); extern int cpu_do_idle(void); @@ -98,18 +102,27 @@ extern void cpu_reset(unsigned long addr, bool hvc) __attribute__((noreturn)); extern void cpu_do_suspend(void *); extern void cpu_do_resume(void *); #else -#define cpu_proc_init processor._proc_init -#define cpu_check_bugs processor.check_bugs -#define cpu_proc_fin processor._proc_fin -#define cpu_reset processor.reset -#define cpu_do_idle processor._do_idle -#define cpu_dcache_clean_area processor.dcache_clean_area -#define cpu_set_pte_ext processor.set_pte_ext -#define cpu_do_switch_mm processor.switch_mm -/* These three are private to arch/arm/kernel/suspend.c */ -#define cpu_do_suspend processor.do_suspend -#define cpu_do_resume processor.do_resume +extern struct processor processor; +#define PROC_VTABLE(f) processor.f +#define PROC_TABLE(f) processor.f +static inline void init_proc_vtable(const struct processor *p) +{ + processor = *p; +} + +#define cpu_proc_init PROC_VTABLE(_proc_init) +#define cpu_check_bugs PROC_VTABLE(check_bugs) +#define cpu_proc_fin PROC_VTABLE(_proc_fin) +#define cpu_reset PROC_VTABLE(reset) +#define cpu_do_idle PROC_VTABLE(_do_idle) +#define cpu_dcache_clean_area PROC_TABLE(dcache_clean_area) +#define cpu_set_pte_ext PROC_TABLE(set_pte_ext) +#define cpu_do_switch_mm PROC_VTABLE(switch_mm) + +/* These two are private to arch/arm/kernel/suspend.c */ +#define cpu_do_suspend PROC_VTABLE(do_suspend) +#define cpu_do_resume PROC_VTABLE(do_resume) #endif extern void cpu_resume(void); diff --git a/arch/arm/kernel/setup.c b/arch/arm/kernel/setup.c index 05a4eb6b0d01b..c214bd14a1fe3 100644 --- a/arch/arm/kernel/setup.c +++ b/arch/arm/kernel/setup.c @@ -693,9 +693,7 @@ static void __init setup_processor(void) cpu_name = list->cpu_name; __cpu_architecture = __get_cpu_architecture(); -#ifdef MULTI_CPU - processor = *list->proc; -#endif + init_proc_vtable(list->proc); #ifdef MULTI_TLB cpu_tlb = *list->tlb; #endif From 5df7a99bdd0de4a0480320264c44c04543c29d5a Mon Sep 17 00:00:00 2001 From: Julien Thierry <julien.thierry@arm.com> Date: Thu, 8 Nov 2018 17:25:28 +0100 Subject: [PATCH 368/443] ARM: 8810/1: vfp: Fix wrong assignement to ufp_exc In vfp_preserve_user_clear_hwstate, ufp_exc->fpinst2 gets assigned to itself. It should actually be hwstate->fpinst2 that gets assigned to the ufp_exc field. Fixes commit 3aa2df6ec2ca6bc143a65351cca4266d03a8bc41 ("ARM: 8791/1: vfp: use __copy_to_user() when saving VFP state"). Reported-by: David Binderman <dcb314@hotmail.com> Signed-off-by: Julien Thierry <julien.thierry@arm.com> Signed-off-by: Russell King <rmk+kernel@armlinux.org.uk> --- arch/arm/vfp/vfpmodule.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm/vfp/vfpmodule.c b/arch/arm/vfp/vfpmodule.c index 3b75f1d8a491a..15bc3cf2a7fdc 100644 --- a/arch/arm/vfp/vfpmodule.c +++ b/arch/arm/vfp/vfpmodule.c @@ -579,7 +579,7 @@ int vfp_preserve_user_clear_hwstate(struct user_vfp *ufp, */ ufp_exc->fpexc = hwstate->fpexc; ufp_exc->fpinst = hwstate->fpinst; - ufp_exc->fpinst2 = ufp_exc->fpinst2; + ufp_exc->fpinst2 = hwstate->fpinst2; /* Ensure that VFP is disabled. */ vfp_flush_hwstate(thread); From 383fb3ee8024d596f488d2dbaf45e572897acbdb Mon Sep 17 00:00:00 2001 From: Russell King <rmk+kernel@armlinux.org.uk> Date: Thu, 19 Jul 2018 12:21:31 +0100 Subject: [PATCH 369/443] ARM: spectre-v2: per-CPU vtables to work around big.Little systems In big.Little systems, some CPUs require the Spectre workarounds in paths such as the context switch, but other CPUs do not. In order to handle these differences, we need per-CPU vtables. We are unable to use the kernel's per-CPU variables to support this as per-CPU is not initialised at times when we need access to the vtables, so we have to use an array indexed by logical CPU number. We use an array-of-pointers to avoid having function pointers in the kernel's read/write .data section. Reviewed-by: Julien Thierry <julien.thierry@arm.com> Signed-off-by: Russell King <rmk+kernel@armlinux.org.uk> --- arch/arm/include/asm/proc-fns.h | 23 +++++++++++++++++++++++ arch/arm/kernel/setup.c | 5 +++++ arch/arm/kernel/smp.c | 31 +++++++++++++++++++++++++++++++ arch/arm/mm/proc-v7-bugs.c | 17 ++--------------- 4 files changed, 61 insertions(+), 15 deletions(-) diff --git a/arch/arm/include/asm/proc-fns.h b/arch/arm/include/asm/proc-fns.h index c259cc49c6415..e1b6f280ab088 100644 --- a/arch/arm/include/asm/proc-fns.h +++ b/arch/arm/include/asm/proc-fns.h @@ -104,12 +104,35 @@ extern void cpu_do_resume(void *); #else extern struct processor processor; +#if defined(CONFIG_BIG_LITTLE) && defined(CONFIG_HARDEN_BRANCH_PREDICTOR) +#include <linux/smp.h> +/* + * This can't be a per-cpu variable because we need to access it before + * per-cpu has been initialised. We have a couple of functions that are + * called in a pre-emptible context, and so can't use smp_processor_id() + * there, hence PROC_TABLE(). We insist in init_proc_vtable() that the + * function pointers for these are identical across all CPUs. + */ +extern struct processor *cpu_vtable[]; +#define PROC_VTABLE(f) cpu_vtable[smp_processor_id()]->f +#define PROC_TABLE(f) cpu_vtable[0]->f +static inline void init_proc_vtable(const struct processor *p) +{ + unsigned int cpu = smp_processor_id(); + *cpu_vtable[cpu] = *p; + WARN_ON_ONCE(cpu_vtable[cpu]->dcache_clean_area != + cpu_vtable[0]->dcache_clean_area); + WARN_ON_ONCE(cpu_vtable[cpu]->set_pte_ext != + cpu_vtable[0]->set_pte_ext); +} +#else #define PROC_VTABLE(f) processor.f #define PROC_TABLE(f) processor.f static inline void init_proc_vtable(const struct processor *p) { processor = *p; } +#endif #define cpu_proc_init PROC_VTABLE(_proc_init) #define cpu_check_bugs PROC_VTABLE(check_bugs) diff --git a/arch/arm/kernel/setup.c b/arch/arm/kernel/setup.c index c214bd14a1fe3..cd46a595422cf 100644 --- a/arch/arm/kernel/setup.c +++ b/arch/arm/kernel/setup.c @@ -115,6 +115,11 @@ EXPORT_SYMBOL(elf_hwcap2); #ifdef MULTI_CPU struct processor processor __ro_after_init; +#if defined(CONFIG_BIG_LITTLE) && defined(CONFIG_HARDEN_BRANCH_PREDICTOR) +struct processor *cpu_vtable[NR_CPUS] = { + [0] = &processor, +}; +#endif #endif #ifdef MULTI_TLB struct cpu_tlb_fns cpu_tlb __ro_after_init; diff --git a/arch/arm/kernel/smp.c b/arch/arm/kernel/smp.c index 5ad0b67b9e334..82b879db32ee4 100644 --- a/arch/arm/kernel/smp.c +++ b/arch/arm/kernel/smp.c @@ -42,6 +42,7 @@ #include <asm/mmu_context.h> #include <asm/pgtable.h> #include <asm/pgalloc.h> +#include <asm/procinfo.h> #include <asm/processor.h> #include <asm/sections.h> #include <asm/tlbflush.h> @@ -102,6 +103,30 @@ static unsigned long get_arch_pgd(pgd_t *pgd) #endif } +#if defined(CONFIG_BIG_LITTLE) && defined(CONFIG_HARDEN_BRANCH_PREDICTOR) +static int secondary_biglittle_prepare(unsigned int cpu) +{ + if (!cpu_vtable[cpu]) + cpu_vtable[cpu] = kzalloc(sizeof(*cpu_vtable[cpu]), GFP_KERNEL); + + return cpu_vtable[cpu] ? 0 : -ENOMEM; +} + +static void secondary_biglittle_init(void) +{ + init_proc_vtable(lookup_processor(read_cpuid_id())->proc); +} +#else +static int secondary_biglittle_prepare(unsigned int cpu) +{ + return 0; +} + +static void secondary_biglittle_init(void) +{ +} +#endif + int __cpu_up(unsigned int cpu, struct task_struct *idle) { int ret; @@ -109,6 +134,10 @@ int __cpu_up(unsigned int cpu, struct task_struct *idle) if (!smp_ops.smp_boot_secondary) return -ENOSYS; + ret = secondary_biglittle_prepare(cpu); + if (ret) + return ret; + /* * We need to tell the secondary core where to find * its stack and the page tables. @@ -360,6 +389,8 @@ asmlinkage void secondary_start_kernel(void) struct mm_struct *mm = &init_mm; unsigned int cpu; + secondary_biglittle_init(); + /* * The identity mapping is uncached (strongly ordered), so * switch away from it before attempting any exclusive accesses. diff --git a/arch/arm/mm/proc-v7-bugs.c b/arch/arm/mm/proc-v7-bugs.c index 5544b82a2e7a5..9a07916af8dd2 100644 --- a/arch/arm/mm/proc-v7-bugs.c +++ b/arch/arm/mm/proc-v7-bugs.c @@ -52,8 +52,6 @@ static void cpu_v7_spectre_init(void) case ARM_CPU_PART_CORTEX_A17: case ARM_CPU_PART_CORTEX_A73: case ARM_CPU_PART_CORTEX_A75: - if (processor.switch_mm != cpu_v7_bpiall_switch_mm) - goto bl_error; per_cpu(harden_branch_predictor_fn, cpu) = harden_branch_predictor_bpiall; spectre_v2_method = "BPIALL"; @@ -61,8 +59,6 @@ static void cpu_v7_spectre_init(void) case ARM_CPU_PART_CORTEX_A15: case ARM_CPU_PART_BRAHMA_B15: - if (processor.switch_mm != cpu_v7_iciallu_switch_mm) - goto bl_error; per_cpu(harden_branch_predictor_fn, cpu) = harden_branch_predictor_iciallu; spectre_v2_method = "ICIALLU"; @@ -88,11 +84,9 @@ static void cpu_v7_spectre_init(void) ARM_SMCCC_ARCH_WORKAROUND_1, &res); if ((int)res.a0 != 0) break; - if (processor.switch_mm != cpu_v7_hvc_switch_mm && cpu) - goto bl_error; per_cpu(harden_branch_predictor_fn, cpu) = call_hvc_arch_workaround_1; - processor.switch_mm = cpu_v7_hvc_switch_mm; + cpu_do_switch_mm = cpu_v7_hvc_switch_mm; spectre_v2_method = "hypervisor"; break; @@ -101,11 +95,9 @@ static void cpu_v7_spectre_init(void) ARM_SMCCC_ARCH_WORKAROUND_1, &res); if ((int)res.a0 != 0) break; - if (processor.switch_mm != cpu_v7_smc_switch_mm && cpu) - goto bl_error; per_cpu(harden_branch_predictor_fn, cpu) = call_smc_arch_workaround_1; - processor.switch_mm = cpu_v7_smc_switch_mm; + cpu_do_switch_mm = cpu_v7_smc_switch_mm; spectre_v2_method = "firmware"; break; @@ -119,11 +111,6 @@ static void cpu_v7_spectre_init(void) if (spectre_v2_method) pr_info("CPU%u: Spectre v2: using %s workaround\n", smp_processor_id(), spectre_v2_method); - return; - -bl_error: - pr_err("CPU%u: Spectre v2: incorrect context switching function, system vulnerable\n", - cpu); } #else static void cpu_v7_spectre_init(void) From fb5bbae9b1333d44023713946fdd28db0cd85751 Mon Sep 17 00:00:00 2001 From: Chris Wilson <chris@chris-wilson.co.uk> Date: Mon, 5 Nov 2018 09:43:05 +0000 Subject: [PATCH 370/443] drm/i915/ringbuffer: Delay after EMIT_INVALIDATE for gen4/gen5 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Exercising the gpu reloc path strenuously revealed an issue where the updated relocations (from MI_STORE_DWORD_IMM) were not being observed upon execution. After some experiments with adding pipecontrols (a lot of pipecontrols (32) as gen4/5 do not have a bit to wait on earlier pipe controls or even the current on), it was discovered that we merely needed to delay the EMIT_INVALIDATE by several flushes. It is important to note that it is the EMIT_INVALIDATE as opposed to the EMIT_FLUSH that needs the delay as opposed to what one might first expect -- that the delay is required for the TLB invalidation to take effect (one presumes to purge any CS buffers) as opposed to a delay after flushing to ensure the writes have landed before triggering invalidation. Testcase: igt/gem_tiled_fence_blits Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> Cc: stable@vger.kernel.org Reviewed-by: Ville Syrjälä <ville.syrjala@linux.intel.com> Link: https://patchwork.freedesktop.org/patch/msgid/20181105094305.5767-1-chris@chris-wilson.co.uk (cherry picked from commit 55f99bf2a9c331838c981694bc872cd1ec4070b2) Signed-off-by: Joonas Lahtinen <joonas.lahtinen@linux.intel.com> --- drivers/gpu/drm/i915/intel_ringbuffer.c | 38 +++++++++++++++++++++++-- 1 file changed, 36 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c index d0ef50bf930ad..187bb0ceb4ac4 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.c +++ b/drivers/gpu/drm/i915/intel_ringbuffer.c @@ -91,6 +91,7 @@ static int gen4_render_ring_flush(struct i915_request *rq, u32 mode) { u32 cmd, *cs; + int i; /* * read/write caches: @@ -127,12 +128,45 @@ gen4_render_ring_flush(struct i915_request *rq, u32 mode) cmd |= MI_INVALIDATE_ISP; } - cs = intel_ring_begin(rq, 2); + i = 2; + if (mode & EMIT_INVALIDATE) + i += 20; + + cs = intel_ring_begin(rq, i); if (IS_ERR(cs)) return PTR_ERR(cs); *cs++ = cmd; - *cs++ = MI_NOOP; + + /* + * A random delay to let the CS invalidate take effect? Without this + * delay, the GPU relocation path fails as the CS does not see + * the updated contents. Just as important, if we apply the flushes + * to the EMIT_FLUSH branch (i.e. immediately after the relocation + * write and before the invalidate on the next batch), the relocations + * still fail. This implies that is a delay following invalidation + * that is required to reset the caches as opposed to a delay to + * ensure the memory is written. + */ + if (mode & EMIT_INVALIDATE) { + *cs++ = GFX_OP_PIPE_CONTROL(4) | PIPE_CONTROL_QW_WRITE; + *cs++ = i915_ggtt_offset(rq->engine->scratch) | + PIPE_CONTROL_GLOBAL_GTT; + *cs++ = 0; + *cs++ = 0; + + for (i = 0; i < 12; i++) + *cs++ = MI_FLUSH; + + *cs++ = GFX_OP_PIPE_CONTROL(4) | PIPE_CONTROL_QW_WRITE; + *cs++ = i915_ggtt_offset(rq->engine->scratch) | + PIPE_CONTROL_GLOBAL_GTT; + *cs++ = 0; + *cs++ = 0; + } + + *cs++ = cmd; + intel_ring_advance(rq, cs); return 0; From 7c4512300cfa5a4dcc8c1c52ae61e3fa4bd11a39 Mon Sep 17 00:00:00 2001 From: Lyude Paul <lyude@redhat.com> Date: Tue, 6 Nov 2018 16:30:12 -0500 Subject: [PATCH 371/443] drm/i915: Fix possible race in intel_dp_add_mst_connector() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This hasn't caused any issues yet that I'm aware of, but as Ville Syrjälä pointed out - we need to make sure that intel_connector->mst_port is set before initializing MST connectors, since in theory we could potentially check intel_connector->mst_port in i915_hpd_poll_init_work() after registering the connector but before having written it's value. Signed-off-by: Lyude Paul <lyude@redhat.com> Reviewed-by: Ville Syrjälä <ville.syrjala@linux.intel.com> Cc: Rodrigo Vivi <rodrigo.vivi@intel.com> Cc: stable@vger.kernel.org Link: https://patchwork.freedesktop.org/patch/msgid/20181106213017.14563-2-lyude@redhat.com (cherry picked from commit 66a5ab1034be801630816d1fa6cfc30db1a2f0b0) Signed-off-by: Joonas Lahtinen <joonas.lahtinen@linux.intel.com> --- drivers/gpu/drm/i915/intel_dp_mst.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_dp_mst.c b/drivers/gpu/drm/i915/intel_dp_mst.c index 1b00f8ea145ba..a911691dbd0fd 100644 --- a/drivers/gpu/drm/i915/intel_dp_mst.c +++ b/drivers/gpu/drm/i915/intel_dp_mst.c @@ -452,6 +452,10 @@ static struct drm_connector *intel_dp_add_mst_connector(struct drm_dp_mst_topolo if (!intel_connector) return NULL; + intel_connector->get_hw_state = intel_dp_mst_get_hw_state; + intel_connector->mst_port = intel_dp; + intel_connector->port = port; + connector = &intel_connector->base; ret = drm_connector_init(dev, connector, &intel_dp_mst_connector_funcs, DRM_MODE_CONNECTOR_DisplayPort); @@ -462,10 +466,6 @@ static struct drm_connector *intel_dp_add_mst_connector(struct drm_dp_mst_topolo drm_connector_helper_add(connector, &intel_dp_mst_connector_helper_funcs); - intel_connector->get_hw_state = intel_dp_mst_get_hw_state; - intel_connector->mst_port = intel_dp; - intel_connector->port = port; - for_each_pipe(dev_priv, pipe) { struct drm_encoder *enc = &intel_dp->mst_encoders[pipe]->base.base; From 541ff7e96c13cd5d67f6021d233f8e1c3df49278 Mon Sep 17 00:00:00 2001 From: Lyude Paul <lyude@redhat.com> Date: Tue, 6 Nov 2018 16:30:13 -0500 Subject: [PATCH 372/443] drm/i915: Fix NULL deref when re-enabling HPD IRQs on systems with MST MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Turns out that if you trigger an HPD storm on a system that has an MST topology connected to it, you'll end up causing the kernel to eventually hit a NULL deref: [ 332.339041] BUG: unable to handle kernel NULL pointer dereference at 00000000000000ec [ 332.340906] PGD 0 P4D 0 [ 332.342750] Oops: 0000 [#1] SMP PTI [ 332.344579] CPU: 2 PID: 25 Comm: kworker/2:0 Kdump: loaded Tainted: G O 4.18.0-rc3short-hpd-storm+ #2 [ 332.346453] Hardware name: LENOVO 20BWS1KY00/20BWS1KY00, BIOS JBET71WW (1.35 ) 09/14/2018 [ 332.348361] Workqueue: events intel_hpd_irq_storm_reenable_work [i915] [ 332.350301] RIP: 0010:intel_hpd_irq_storm_reenable_work.cold.3+0x2f/0x86 [i915] [ 332.352213] Code: 00 00 ba e8 00 00 00 48 c7 c6 c0 aa 5f a0 48 c7 c7 d0 73 62 a0 4c 89 c1 4c 89 04 24 e8 7f f5 af e0 4c 8b 04 24 44 89 f8 29 e8 <41> 39 80 ec 00 00 00 0f 85 43 13 fc ff 41 0f b6 86 b8 04 00 00 41 [ 332.354286] RSP: 0018:ffffc90000147e48 EFLAGS: 00010006 [ 332.356344] RAX: 0000000000000005 RBX: ffff8802c226c9d4 RCX: 0000000000000006 [ 332.358404] RDX: 0000000000000000 RSI: 0000000000000082 RDI: ffff88032dc95570 [ 332.360466] RBP: 0000000000000005 R08: 0000000000000000 R09: ffff88031b3dc840 [ 332.362528] R10: 0000000000000000 R11: 000000031a069602 R12: ffff8802c226ca20 [ 332.364575] R13: ffff8802c2268000 R14: ffff880310661000 R15: 000000000000000a [ 332.366615] FS: 0000000000000000(0000) GS:ffff88032dc80000(0000) knlGS:0000000000000000 [ 332.368658] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ 332.370690] CR2: 00000000000000ec CR3: 000000000200a003 CR4: 00000000003606e0 [ 332.372724] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 [ 332.374773] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 [ 332.376798] Call Trace: [ 332.378809] process_one_work+0x1a1/0x350 [ 332.380806] worker_thread+0x30/0x380 [ 332.382777] ? wq_update_unbound_numa+0x10/0x10 [ 332.384772] kthread+0x112/0x130 [ 332.386740] ? kthread_create_worker_on_cpu+0x70/0x70 [ 332.388706] ret_from_fork+0x35/0x40 [ 332.390651] Modules linked in: i915(O) vfat fat joydev btusb btrtl btbcm btintel bluetooth ecdh_generic iTCO_wdt wmi_bmof i2c_algo_bit drm_kms_helper intel_rapl syscopyarea sysfillrect x86_pkg_temp_thermal sysimgblt coretemp fb_sys_fops crc32_pclmul drm psmouse pcspkr mei_me mei i2c_i801 lpc_ich mfd_core i2c_core tpm_tis tpm_tis_core thinkpad_acpi wmi tpm rfkill video crc32c_intel serio_raw ehci_pci xhci_pci ehci_hcd xhci_hcd [last unloaded: i915] [ 332.394963] CR2: 00000000000000ec This appears to be due to the fact that with an MST topology, not all intel_connector structs will have ->encoder set. So, fix this by skipping connectors without encoders in intel_hpd_irq_storm_reenable_work(). For those wondering, this bug was found on accident while simulating HPD storms using a Chamelium connected to a ThinkPad T450s (Broadwell). Changes since v1: - Check intel_connector->mst_port instead of intel_connector->encoder Signed-off-by: Lyude Paul <lyude@redhat.com> Reviewed-by: Ville Syrjälä <ville.syrjala@linux.intel.com> Cc: stable@vger.kernel.org Cc: Rodrigo Vivi <rodrigo.vivi@intel.com> Link: https://patchwork.freedesktop.org/patch/msgid/20181106213017.14563-3-lyude@redhat.com (cherry picked from commit fee61deecb1d850bf34f682a6a452e5ee51b7572) Signed-off-by: Joonas Lahtinen <joonas.lahtinen@linux.intel.com> --- drivers/gpu/drm/i915/intel_hotplug.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/intel_hotplug.c b/drivers/gpu/drm/i915/intel_hotplug.c index 648a13c6043c0..8326900a311e4 100644 --- a/drivers/gpu/drm/i915/intel_hotplug.c +++ b/drivers/gpu/drm/i915/intel_hotplug.c @@ -228,7 +228,9 @@ static void intel_hpd_irq_storm_reenable_work(struct work_struct *work) drm_for_each_connector_iter(connector, &conn_iter) { struct intel_connector *intel_connector = to_intel_connector(connector); - if (intel_connector->encoder->hpd_pin == pin) { + /* Don't check MST ports, they don't have pins */ + if (!intel_connector->mst_port && + intel_connector->encoder->hpd_pin == pin) { if (connector->polled != intel_connector->polled) DRM_DEBUG_DRIVER("Reenabling HPD on connector %s\n", connector->name); From c4f224076d00ccf30c7bd3561cceaed82628c8ce Mon Sep 17 00:00:00 2001 From: Imre Deak <imre.deak@intel.com> Date: Fri, 2 Nov 2018 20:22:00 +0200 Subject: [PATCH 373/443] drm/i915/icl: Fix power well 2 wrt. DC-off toggling order MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit To enable DC5/6 power well 2 has to be disabled as for previous platforms, so fix things up. Bspec: 4234 Fixes: 67ca07e7ac10 ("drm/i915/icl: Add power well support") Cc: Animesh Manna <animesh.manna@intel.com> Cc: Paulo Zanoni <paulo.r.zanoni@intel.com> Cc: Ville Syrjälä <ville.syrjala@linux.intel.com> Signed-off-by: Imre Deak <imre.deak@intel.com> Reviewed-by: Ville Syrjälä <ville.syrjala@linux.intel.com> Link: https://patchwork.freedesktop.org/patch/msgid/20181102182200.17219-1-imre.deak@intel.com (cherry picked from commit a33e1ece777996ddddb1f23a30f8c66422ed0b68) Signed-off-by: Joonas Lahtinen <joonas.lahtinen@linux.intel.com> --- drivers/gpu/drm/i915/intel_runtime_pm.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_runtime_pm.c b/drivers/gpu/drm/i915/intel_runtime_pm.c index 0fdabce647ab6..0a4990d8843c4 100644 --- a/drivers/gpu/drm/i915/intel_runtime_pm.c +++ b/drivers/gpu/drm/i915/intel_runtime_pm.c @@ -2748,6 +2748,12 @@ static const struct i915_power_well_desc icl_power_wells[] = { .hsw.has_fuses = true, }, }, + { + .name = "DC off", + .domains = ICL_DISPLAY_DC_OFF_POWER_DOMAINS, + .ops = &gen9_dc_off_power_well_ops, + .id = DISP_PW_ID_NONE, + }, { .name = "power well 2", .domains = ICL_PW_2_POWER_DOMAINS, @@ -2759,12 +2765,6 @@ static const struct i915_power_well_desc icl_power_wells[] = { .hsw.has_fuses = true, }, }, - { - .name = "DC off", - .domains = ICL_DISPLAY_DC_OFF_POWER_DOMAINS, - .ops = &gen9_dc_off_power_well_ops, - .id = DISP_PW_ID_NONE, - }, { .name = "power well 3", .domains = ICL_PW_3_POWER_DOMAINS, From 0a823e8fd4fd67726697854578f3584ee3a49b1d Mon Sep 17 00:00:00 2001 From: Chris Wilson <chris@chris-wilson.co.uk> Date: Thu, 8 Nov 2018 08:17:38 +0000 Subject: [PATCH 374/443] drm/i915/execlists: Force write serialisation into context image vs execution Ensure that the writes into the context image are completed prior to the register mmio to trigger execution. Although previously we were assured by the SDM that all writes are flushed before an uncached memory transaction (our mmio write to submit the context to HW for execution), we have empirical evidence to believe that this is not actually the case. Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=108656 References: https://bugs.freedesktop.org/show_bug.cgi?id=108315 References: https://bugs.freedesktop.org/show_bug.cgi?id=106887 Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> Cc: Mika Kuoppala <mika.kuoppala@linux.intel.com> Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com> Acked-by: Mika Kuoppala <mika.kuoppala@linux.intel.com> Link: https://patchwork.freedesktop.org/patch/msgid/20181108081740.25615-1-chris@chris-wilson.co.uk Cc: stable@vger.kernel.org (cherry picked from commit 987abd5c62f92ee4970b45aa077f47949974e615) Signed-off-by: Joonas Lahtinen <joonas.lahtinen@linux.intel.com> --- drivers/gpu/drm/i915/intel_lrc.c | 14 +++++++++++++- 1 file changed, 13 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c index 43957bb37a422..37c94a54efcbb 100644 --- a/drivers/gpu/drm/i915/intel_lrc.c +++ b/drivers/gpu/drm/i915/intel_lrc.c @@ -424,7 +424,8 @@ static u64 execlists_update_context(struct i915_request *rq) reg_state[CTX_RING_TAIL+1] = intel_ring_set_tail(rq->ring, rq->tail); - /* True 32b PPGTT with dynamic page allocation: update PDP + /* + * True 32b PPGTT with dynamic page allocation: update PDP * registers and point the unallocated PDPs to scratch page. * PML4 is allocated during ppgtt init, so this is not needed * in 48-bit mode. @@ -432,6 +433,17 @@ static u64 execlists_update_context(struct i915_request *rq) if (ppgtt && !i915_vm_is_48bit(&ppgtt->vm)) execlists_update_context_pdps(ppgtt, reg_state); + /* + * Make sure the context image is complete before we submit it to HW. + * + * Ostensibly, writes (including the WCB) should be flushed prior to + * an uncached write such as our mmio register access, the empirical + * evidence (esp. on Braswell) suggests that the WC write into memory + * may not be visible to the HW prior to the completion of the UC + * register write and that we may begin execution from the context + * before its image is complete leading to invalid PD chasing. + */ + wmb(); return ce->lrc_desc; } From 44a7276b30c3c15f2b7790a5729640597fb6a1df Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= <ville.syrjala@linux.intel.com> Date: Mon, 12 Nov 2018 15:36:16 +0200 Subject: [PATCH 375/443] drm/i915: Fix hpd handling for pins with two encoders MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit In my haste to remove irq_port[] I accidentally changed the way we deal with hpd pins that are shared by multiple encoders (DP and HDMI for pre-DDI platforms). Previously we would only handle such pins via ->hpd_pulse(), but now we queue up the hotplug work for the HDMI encoder directly. Worse yet, we now count each hpd twice and this increment the hpd storm count twice as fast. This can lead to spurious storms being detected. Go back to the old way of doing things, ie. delegate to ->hpd_pulse() for any pin which has an encoder with that hook implemented. I don't really like the idea of adding irq_port[] back so let's loop through the encoders first to check if we have an encoder with ->hpd_pulse() for the pin, and then go through all the pins and decided on the correct course of action based on the earlier findings. I have occasionally toyed with the idea of unifying the pre-DDI HDMI and DP encoders into a single encoder as well. Besides the hotplug processing it would have the other benefit of preventing userspace from trying to enable both encoders at the same time. That is simply illegal as they share the same clock/data pins. We have some testcases that will attempt that and thus fail on many older machines. But for now let's stick to fixing just the hotplug code. Cc: stable@vger.kernel.org # 4.19+ Cc: Lyude Paul <lyude@redhat.com> Cc: Rodrigo Vivi <rodrigo.vivi@intel.com> Fixes: b6ca3eee18ba ("drm/i915: Nuke dev_priv->irq_port[]") Signed-off-by: Ville Syrjälä <ville.syrjala@linux.intel.com> Link: https://patchwork.freedesktop.org/patch/msgid/20181108200424.28371-1-ville.syrjala@linux.intel.com Reviewed-by: Lyude Paul <lyude@redhat.com> (cherry picked from commit 5a3aeca97af1b6b3498d59a7fd4e8bb95814c108) Signed-off-by: Joonas Lahtinen <joonas.lahtinen@linux.intel.com> --- drivers/gpu/drm/i915/intel_hotplug.c | 66 ++++++++++++++++++++-------- 1 file changed, 47 insertions(+), 19 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_hotplug.c b/drivers/gpu/drm/i915/intel_hotplug.c index 8326900a311e4..9a80181302372 100644 --- a/drivers/gpu/drm/i915/intel_hotplug.c +++ b/drivers/gpu/drm/i915/intel_hotplug.c @@ -397,37 +397,54 @@ void intel_hpd_irq_handler(struct drm_i915_private *dev_priv, struct intel_encoder *encoder; bool storm_detected = false; bool queue_dig = false, queue_hp = false; + u32 long_hpd_pulse_mask = 0; + u32 short_hpd_pulse_mask = 0; + enum hpd_pin pin; if (!pin_mask) return; spin_lock(&dev_priv->irq_lock); + + /* + * Determine whether ->hpd_pulse() exists for each pin, and + * whether we have a short or a long pulse. This is needed + * as each pin may have up to two encoders (HDMI and DP) and + * only the one of them (DP) will have ->hpd_pulse(). + */ for_each_intel_encoder(&dev_priv->drm, encoder) { - enum hpd_pin pin = encoder->hpd_pin; bool has_hpd_pulse = intel_encoder_has_hpd_pulse(encoder); + enum port port = encoder->port; + bool long_hpd; + pin = encoder->hpd_pin; if (!(BIT(pin) & pin_mask)) continue; - if (has_hpd_pulse) { - bool long_hpd = long_mask & BIT(pin); - enum port port = encoder->port; + if (!has_hpd_pulse) + continue; - DRM_DEBUG_DRIVER("digital hpd port %c - %s\n", port_name(port), - long_hpd ? "long" : "short"); - /* - * For long HPD pulses we want to have the digital queue happen, - * but we still want HPD storm detection to function. - */ - queue_dig = true; - if (long_hpd) { - dev_priv->hotplug.long_port_mask |= (1 << port); - } else { - /* for short HPD just trigger the digital queue */ - dev_priv->hotplug.short_port_mask |= (1 << port); - continue; - } + long_hpd = long_mask & BIT(pin); + + DRM_DEBUG_DRIVER("digital hpd port %c - %s\n", port_name(port), + long_hpd ? "long" : "short"); + queue_dig = true; + + if (long_hpd) { + long_hpd_pulse_mask |= BIT(pin); + dev_priv->hotplug.long_port_mask |= BIT(port); + } else { + short_hpd_pulse_mask |= BIT(pin); + dev_priv->hotplug.short_port_mask |= BIT(port); } + } + + /* Now process each pin just once */ + for_each_hpd_pin(pin) { + bool long_hpd; + + if (!(BIT(pin) & pin_mask)) + continue; if (dev_priv->hotplug.stats[pin].state == HPD_DISABLED) { /* @@ -444,11 +461,22 @@ void intel_hpd_irq_handler(struct drm_i915_private *dev_priv, if (dev_priv->hotplug.stats[pin].state != HPD_ENABLED) continue; - if (!has_hpd_pulse) { + /* + * Delegate to ->hpd_pulse() if one of the encoders for this + * pin has it, otherwise let the hotplug_work deal with this + * pin directly. + */ + if (((short_hpd_pulse_mask | long_hpd_pulse_mask) & BIT(pin))) { + long_hpd = long_hpd_pulse_mask & BIT(pin); + } else { dev_priv->hotplug.event_bits |= BIT(pin); + long_hpd = true; queue_hp = true; } + if (!long_hpd) + continue; + if (intel_hpd_irq_storm_detect(dev_priv, pin)) { dev_priv->hotplug.event_bits &= ~BIT(pin); storm_detected = true; From 18e962ac0781bcb70d433de3b2a325ff792b4288 Mon Sep 17 00:00:00 2001 From: Omar Sandoval <osandov@fb.com> Date: Mon, 12 Nov 2018 00:08:46 -0800 Subject: [PATCH 376/443] kyber: fix wrong strlcpy() size in trace_kyber_latency() When copying to the latency type, we should be passing LATENCY_TYPE_LEN, not DOMAIN_LEN (this isn't a problem in practice because we only pass "total" or "I/O"). Fix it by changing all of the strlcpy() calls to use sizeof(). Fixes: 6c3b7af1c975 ("kyber: add tracepoints") Reported-by: Jordan Glover <Golden_Miller83@protonmail.ch> Tested-by: Jordan Glover <Golden_Miller83@protonmail.ch> Signed-off-by: Omar Sandoval <osandov@fb.com> Signed-off-by: Jens Axboe <axboe@kernel.dk> --- include/trace/events/kyber.h | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/include/trace/events/kyber.h b/include/trace/events/kyber.h index a9834c37ac400..c0e7d24ca2568 100644 --- a/include/trace/events/kyber.h +++ b/include/trace/events/kyber.h @@ -31,8 +31,8 @@ TRACE_EVENT(kyber_latency, TP_fast_assign( __entry->dev = disk_devt(dev_to_disk(kobj_to_dev(q->kobj.parent))); - strlcpy(__entry->domain, domain, DOMAIN_LEN); - strlcpy(__entry->type, type, DOMAIN_LEN); + strlcpy(__entry->domain, domain, sizeof(__entry->domain)); + strlcpy(__entry->type, type, sizeof(__entry->type)); __entry->percentile = percentile; __entry->numerator = numerator; __entry->denominator = denominator; @@ -60,7 +60,7 @@ TRACE_EVENT(kyber_adjust, TP_fast_assign( __entry->dev = disk_devt(dev_to_disk(kobj_to_dev(q->kobj.parent))); - strlcpy(__entry->domain, domain, DOMAIN_LEN); + strlcpy(__entry->domain, domain, sizeof(__entry->domain)); __entry->depth = depth; ), @@ -82,7 +82,7 @@ TRACE_EVENT(kyber_throttled, TP_fast_assign( __entry->dev = disk_devt(dev_to_disk(kobj_to_dev(q->kobj.parent))); - strlcpy(__entry->domain, domain, DOMAIN_LEN); + strlcpy(__entry->domain, domain, sizeof(__entry->domain)); ), TP_printk("%d,%d %s", MAJOR(__entry->dev), MINOR(__entry->dev), From ca474b73896bf6e0c1eb8787eb217b0f80221610 Mon Sep 17 00:00:00 2001 From: Hannes Reinecke <hare@suse.com> Date: Mon, 12 Nov 2018 10:35:25 -0700 Subject: [PATCH 377/443] block: copy ioprio in __bio_clone_fast() and bounce We need to copy the io priority, too; otherwise the clone will run with a different priority than the original one. Fixes: 43b62ce3ff0a ("block: move bio io prio to a new field") Signed-off-by: Hannes Reinecke <hare@suse.com> Signed-off-by: Jean Delvare <jdelvare@suse.de> Fixed up subject, and ordered stores. Signed-off-by: Jens Axboe <axboe@kernel.dk> --- block/bio.c | 1 + block/bounce.c | 1 + 2 files changed, 2 insertions(+) diff --git a/block/bio.c b/block/bio.c index a50d59236b197..4f4d9884443b6 100644 --- a/block/bio.c +++ b/block/bio.c @@ -605,6 +605,7 @@ void __bio_clone_fast(struct bio *bio, struct bio *bio_src) if (bio_flagged(bio_src, BIO_THROTTLED)) bio_set_flag(bio, BIO_THROTTLED); bio->bi_opf = bio_src->bi_opf; + bio->bi_ioprio = bio_src->bi_ioprio; bio->bi_write_hint = bio_src->bi_write_hint; bio->bi_iter = bio_src->bi_iter; bio->bi_io_vec = bio_src->bi_io_vec; diff --git a/block/bounce.c b/block/bounce.c index 36869afc258cc..559c55bda040e 100644 --- a/block/bounce.c +++ b/block/bounce.c @@ -248,6 +248,7 @@ static struct bio *bounce_clone_bio(struct bio *bio_src, gfp_t gfp_mask, return NULL; bio->bi_disk = bio_src->bi_disk; bio->bi_opf = bio_src->bi_opf; + bio->bi_ioprio = bio_src->bi_ioprio; bio->bi_write_hint = bio_src->bi_write_hint; bio->bi_iter.bi_sector = bio_src->bi_iter.bi_sector; bio->bi_iter.bi_size = bio_src->bi_iter.bi_size; From 410b5c7b48368317af95f0113692561d01d8144e Mon Sep 17 00:00:00 2001 From: Diego Viola <diego.viola@gmail.com> Date: Mon, 12 Nov 2018 17:22:52 -0200 Subject: [PATCH 378/443] libata: blacklist SAMSUNG MZ7TD256HAFV-000L9 SSD med_power_with_dipm still causes freezes after updating the firmware to the latest version (DXT04L5Q). Set model_rev to NULL and blacklist the device. Cc: stable@vger.kernel.org Signed-off-by: Diego Viola <diego.viola@gmail.com> Reviewed-by: Hans de Goede <hdegoede@redhat.com> Signed-off-by: Jens Axboe <axboe@kernel.dk> --- drivers/ata/libata-core.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/ata/libata-core.c b/drivers/ata/libata-core.c index 6e594644cb1d3..a7f5202a48152 100644 --- a/drivers/ata/libata-core.c +++ b/drivers/ata/libata-core.c @@ -4553,7 +4553,7 @@ static const struct ata_blacklist_entry ata_device_blacklist [] = { /* These specific Samsung models/firmware-revs do not handle LPM well */ { "SAMSUNG MZMPC128HBFU-000MV", "CXM14M1Q", ATA_HORKAGE_NOLPM, }, { "SAMSUNG SSD PM830 mSATA *", "CXM13D1Q", ATA_HORKAGE_NOLPM, }, - { "SAMSUNG MZ7TD256HAFV-000L9", "DXT02L5Q", ATA_HORKAGE_NOLPM, }, + { "SAMSUNG MZ7TD256HAFV-000L9", NULL, ATA_HORKAGE_NOLPM, }, /* devices that don't properly handle queued TRIM commands */ { "Micron_M500IT_*", "MU01", ATA_HORKAGE_NO_NCQ_TRIM | From 5581c670fb7ec267fc79215c6d5176b07e5f6dad Mon Sep 17 00:00:00 2001 From: shaoyunl <shaoyun.liu@amd.com> Date: Mon, 12 Nov 2018 11:19:24 -0500 Subject: [PATCH 379/443] drm/amdgpu: set system aperture to cover whole FB region MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit In XGMI configuration, the FB region covers vram region from peer device, adjust system aperture to cover all of them Reviewed-by: Alex Deucher <alexander.deucher@amd.com> Reviewed-by: Christian König <christian.koenig@amd.com> Signed-off-by: shaoyunl <shaoyun.liu@amd.com> Signed-off-by: Alex Deucher <alexander.deucher@amd.com> --- drivers/gpu/drm/amd/amdgpu/gfxhub_v1_0.c | 6 +++--- drivers/gpu/drm/amd/amdgpu/mmhub_v1_0.c | 6 +++--- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/gfxhub_v1_0.c b/drivers/gpu/drm/amd/amdgpu/gfxhub_v1_0.c index ceb7847b504f7..bfa317ad20a95 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfxhub_v1_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfxhub_v1_0.c @@ -72,7 +72,7 @@ static void gfxhub_v1_0_init_system_aperture_regs(struct amdgpu_device *adev) /* Program the system aperture low logical page number. */ WREG32_SOC15(GC, 0, mmMC_VM_SYSTEM_APERTURE_LOW_ADDR, - min(adev->gmc.vram_start, adev->gmc.agp_start) >> 18); + min(adev->gmc.fb_start, adev->gmc.agp_start) >> 18); if (adev->asic_type == CHIP_RAVEN && adev->rev_id >= 0x8) /* @@ -82,11 +82,11 @@ static void gfxhub_v1_0_init_system_aperture_regs(struct amdgpu_device *adev) * to get rid of the VM fault and hardware hang. */ WREG32_SOC15(GC, 0, mmMC_VM_SYSTEM_APERTURE_HIGH_ADDR, - max((adev->gmc.vram_end >> 18) + 0x1, + max((adev->gmc.fb_end >> 18) + 0x1, adev->gmc.agp_end >> 18)); else WREG32_SOC15(GC, 0, mmMC_VM_SYSTEM_APERTURE_HIGH_ADDR, - max(adev->gmc.vram_end, adev->gmc.agp_end) >> 18); + max(adev->gmc.fb_end, adev->gmc.agp_end) >> 18); /* Set default page address. */ value = adev->vram_scratch.gpu_addr - adev->gmc.vram_start diff --git a/drivers/gpu/drm/amd/amdgpu/mmhub_v1_0.c b/drivers/gpu/drm/amd/amdgpu/mmhub_v1_0.c index fd23ba1226a57..a0db67adc34ce 100644 --- a/drivers/gpu/drm/amd/amdgpu/mmhub_v1_0.c +++ b/drivers/gpu/drm/amd/amdgpu/mmhub_v1_0.c @@ -90,7 +90,7 @@ static void mmhub_v1_0_init_system_aperture_regs(struct amdgpu_device *adev) /* Program the system aperture low logical page number. */ WREG32_SOC15(MMHUB, 0, mmMC_VM_SYSTEM_APERTURE_LOW_ADDR, - min(adev->gmc.vram_start, adev->gmc.agp_start) >> 18); + min(adev->gmc.fb_start, adev->gmc.agp_start) >> 18); if (adev->asic_type == CHIP_RAVEN && adev->rev_id >= 0x8) /* @@ -100,11 +100,11 @@ static void mmhub_v1_0_init_system_aperture_regs(struct amdgpu_device *adev) * to get rid of the VM fault and hardware hang. */ WREG32_SOC15(MMHUB, 0, mmMC_VM_SYSTEM_APERTURE_HIGH_ADDR, - max((adev->gmc.vram_end >> 18) + 0x1, + max((adev->gmc.fb_end >> 18) + 0x1, adev->gmc.agp_end >> 18)); else WREG32_SOC15(MMHUB, 0, mmMC_VM_SYSTEM_APERTURE_HIGH_ADDR, - max(adev->gmc.vram_end, adev->gmc.agp_end) >> 18); + max(adev->gmc.fb_end, adev->gmc.agp_end) >> 18); /* Set default page address. */ value = adev->vram_scratch.gpu_addr - adev->gmc.vram_start + From 21a446cf186570168b7281b154b1993968598aca Mon Sep 17 00:00:00 2001 From: Trond Myklebust <trond.myklebust@hammerspace.com> Date: Mon, 5 Nov 2018 11:10:50 -0500 Subject: [PATCH 380/443] NFSv4: Don't exit the state manager without clearing NFS4CLNT_MANAGER_RUNNING If we exit the NFSv4 state manager due to a umount, then we can end up leaving the NFS4CLNT_MANAGER_RUNNING flag set. If another mount causes the nfs4_client to be rereferenced before it is destroyed, then we end up never being able to recover state. Fixes: 47c2199b6eb5 ("NFSv4.1: Ensure state manager thread dies on last ...") Signed-off-by: Trond Myklebust <trond.myklebust@hammerspace.com> Cc: stable@vger.kernel.org # v4.15+ --- fs/nfs/nfs4state.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/fs/nfs/nfs4state.c b/fs/nfs/nfs4state.c index 62ae0fd345ad6..98d1b6a6646a0 100644 --- a/fs/nfs/nfs4state.c +++ b/fs/nfs/nfs4state.c @@ -2601,11 +2601,12 @@ static void nfs4_state_manager(struct nfs_client *clp) nfs4_clear_state_manager_bit(clp); /* Did we race with an attempt to give us more work? */ if (clp->cl_state == 0) - break; + return; if (test_and_set_bit(NFS4CLNT_MANAGER_RUNNING, &clp->cl_state) != 0) - break; + return; } while (refcount_read(&clp->cl_count) > 1); - return; + goto out_drain; + out_error: if (strlen(section)) section_sep = ": "; @@ -2613,6 +2614,7 @@ static void nfs4_state_manager(struct nfs_client *clp) " with error %d\n", section_sep, section, clp->cl_hostname, -status); ssleep(1); +out_drain: nfs4_end_drain_session(clp); nfs4_clear_state_manager_bit(clp); } From a1aa09be21fa344d1f5585aab8164bfae55f57e3 Mon Sep 17 00:00:00 2001 From: Trond Myklebust <trond.myklebust@hammerspace.com> Date: Mon, 5 Nov 2018 12:17:01 -0500 Subject: [PATCH 381/443] NFSv4: Ensure that the state manager exits the loop on SIGKILL Signed-off-by: Trond Myklebust <trond.myklebust@hammerspace.com> --- fs/nfs/nfs4state.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/nfs/nfs4state.c b/fs/nfs/nfs4state.c index 98d1b6a6646a0..ffea578853949 100644 --- a/fs/nfs/nfs4state.c +++ b/fs/nfs/nfs4state.c @@ -2604,7 +2604,7 @@ static void nfs4_state_manager(struct nfs_client *clp) return; if (test_and_set_bit(NFS4CLNT_MANAGER_RUNNING, &clp->cl_state) != 0) return; - } while (refcount_read(&clp->cl_count) > 1); + } while (refcount_read(&clp->cl_count) > 1 && !signalled()); goto out_drain; out_error: From a652a4bc21695a57c3b8d13d222a6f8b41f100aa Mon Sep 17 00:00:00 2001 From: Trond Myklebust <trond.myklebust@hammerspace.com> Date: Mon, 12 Nov 2018 15:30:52 -0500 Subject: [PATCH 382/443] SUNRPC: Fix a Oops when destroying the RPCSEC_GSS credential cache Commit 07d02a67b7fa causes a use-after free in the RPCSEC_GSS credential destroy code, because the call to get_rpccred() in gss_destroying_context() will now always fail to increment the refcount. While we could just replace the get_rpccred() with a refcount_set(), that would have the unfortunate consequence of resurrecting a credential in the credential cache for which we are in the process of destroying the RPCSEC_GSS context. Rather than do this, we choose to make a copy that is never added to the cache and use that to destroy the context. Fixes: 07d02a67b7fa ("SUNRPC: Simplify lookup code") Signed-off-by: Trond Myklebust <trond.myklebust@hammerspace.com> --- net/sunrpc/auth_gss/auth_gss.c | 61 +++++++++++++++++++++++----------- 1 file changed, 42 insertions(+), 19 deletions(-) diff --git a/net/sunrpc/auth_gss/auth_gss.c b/net/sunrpc/auth_gss/auth_gss.c index 30f970cdc7f66..5d3f252659f19 100644 --- a/net/sunrpc/auth_gss/auth_gss.c +++ b/net/sunrpc/auth_gss/auth_gss.c @@ -1239,36 +1239,59 @@ gss_create(const struct rpc_auth_create_args *args, struct rpc_clnt *clnt) return &gss_auth->rpc_auth; } +static struct gss_cred * +gss_dup_cred(struct gss_auth *gss_auth, struct gss_cred *gss_cred) +{ + struct gss_cred *new; + + /* Make a copy of the cred so that we can reference count it */ + new = kzalloc(sizeof(*gss_cred), GFP_NOIO); + if (new) { + struct auth_cred acred = { + .uid = gss_cred->gc_base.cr_uid, + }; + struct gss_cl_ctx *ctx = + rcu_dereference_protected(gss_cred->gc_ctx, 1); + + rpcauth_init_cred(&new->gc_base, &acred, + &gss_auth->rpc_auth, + &gss_nullops); + new->gc_base.cr_flags = 1UL << RPCAUTH_CRED_UPTODATE; + new->gc_service = gss_cred->gc_service; + new->gc_principal = gss_cred->gc_principal; + kref_get(&gss_auth->kref); + rcu_assign_pointer(new->gc_ctx, ctx); + gss_get_ctx(ctx); + } + return new; +} + /* - * gss_destroying_context will cause the RPCSEC_GSS to send a NULL RPC call + * gss_send_destroy_context will cause the RPCSEC_GSS to send a NULL RPC call * to the server with the GSS control procedure field set to * RPC_GSS_PROC_DESTROY. This should normally cause the server to release * all RPCSEC_GSS state associated with that context. */ -static int -gss_destroying_context(struct rpc_cred *cred) +static void +gss_send_destroy_context(struct rpc_cred *cred) { struct gss_cred *gss_cred = container_of(cred, struct gss_cred, gc_base); struct gss_auth *gss_auth = container_of(cred->cr_auth, struct gss_auth, rpc_auth); struct gss_cl_ctx *ctx = rcu_dereference_protected(gss_cred->gc_ctx, 1); + struct gss_cred *new; struct rpc_task *task; - if (test_bit(RPCAUTH_CRED_UPTODATE, &cred->cr_flags) == 0) - return 0; - - ctx->gc_proc = RPC_GSS_PROC_DESTROY; - cred->cr_ops = &gss_nullops; - - /* Take a reference to ensure the cred will be destroyed either - * by the RPC call or by the put_rpccred() below */ - get_rpccred(cred); + new = gss_dup_cred(gss_auth, gss_cred); + if (new) { + ctx->gc_proc = RPC_GSS_PROC_DESTROY; - task = rpc_call_null(gss_auth->client, cred, RPC_TASK_ASYNC|RPC_TASK_SOFT); - if (!IS_ERR(task)) - rpc_put_task(task); + task = rpc_call_null(gss_auth->client, &new->gc_base, + RPC_TASK_ASYNC|RPC_TASK_SOFT); + if (!IS_ERR(task)) + rpc_put_task(task); - put_rpccred(cred); - return 1; + put_rpccred(&new->gc_base); + } } /* gss_destroy_cred (and gss_free_ctx) are used to clean up after failure @@ -1330,8 +1353,8 @@ static void gss_destroy_cred(struct rpc_cred *cred) { - if (gss_destroying_context(cred)) - return; + if (test_and_clear_bit(RPCAUTH_CRED_UPTODATE, &cred->cr_flags) != 0) + gss_send_destroy_context(cred); gss_destroy_nullcred(cred); } From e3d5e573a54dabdc0f9f3cb039d799323372b251 Mon Sep 17 00:00:00 2001 From: Trond Myklebust <trond.myklebust@hammerspace.com> Date: Mon, 12 Nov 2018 16:06:51 -0500 Subject: [PATCH 383/443] SUNRPC: Fix a bogus get/put in generic_key_to_expire() Signed-off-by: Trond Myklebust <trond.myklebust@hammerspace.com> --- net/sunrpc/auth_generic.c | 8 +------- 1 file changed, 1 insertion(+), 7 deletions(-) diff --git a/net/sunrpc/auth_generic.c b/net/sunrpc/auth_generic.c index d8831b988b1e7..ab4a3be1542a0 100644 --- a/net/sunrpc/auth_generic.c +++ b/net/sunrpc/auth_generic.c @@ -281,13 +281,7 @@ static bool generic_key_to_expire(struct rpc_cred *cred) { struct auth_cred *acred = &container_of(cred, struct generic_cred, gc_base)->acred; - bool ret; - - get_rpccred(cred); - ret = test_bit(RPC_CRED_KEY_EXPIRE_SOON, &acred->ac_flags); - put_rpccred(cred); - - return ret; + return test_bit(RPC_CRED_KEY_EXPIRE_SOON, &acred->ac_flags); } static const struct rpc_credops generic_credops = { From 4ab49461d9d9b8274cc72d8656fe685ed394b8f7 Mon Sep 17 00:00:00 2001 From: Anup Patel <anup@brainfault.org> Date: Thu, 1 Nov 2018 10:40:33 +0530 Subject: [PATCH 384/443] RISC-V: defconfig: Enable printk timestamps The printk timestamps are very useful information to visually see where kernel is spending time during boot. It also helps us see the timing of hotplug events at runtime. This patch enables printk timestamps in RISC-V defconfig so that we have it enabled by default (similar to other architectures such as x86_64, arm64, etc). Signed-off-by: Anup Patel <anup@brainfault.org> Acked-by: Olof Johansson <olof@lixom.net> Signed-off-by: Palmer Dabbelt <palmer@sifive.com> --- arch/riscv/configs/defconfig | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/riscv/configs/defconfig b/arch/riscv/configs/defconfig index 07fa9ea75fea1..ef4f15df9adf0 100644 --- a/arch/riscv/configs/defconfig +++ b/arch/riscv/configs/defconfig @@ -76,4 +76,5 @@ CONFIG_NFS_V4_1=y CONFIG_NFS_V4_2=y CONFIG_ROOT_NFS=y CONFIG_CRYPTO_USER_API_HASH=y +CONFIG_PRINTK_TIME=y # CONFIG_RCU_TRACE is not set From 10febb3ecace4b557eaa0d52c9d2c3531c1a715a Mon Sep 17 00:00:00 2001 From: David Abdurachmanov <david.abdurachmanov@gmail.com> Date: Mon, 5 Nov 2018 15:40:04 +0100 Subject: [PATCH 385/443] riscv: fix spacing in struct pt_regs Replace 8 spaces with tab to match styling. Signed-off-by: David Abdurachmanov <david.abdurachmanov@gmail.com> Signed-off-by: Palmer Dabbelt <palmer@sifive.com> --- arch/riscv/include/asm/ptrace.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/riscv/include/asm/ptrace.h b/arch/riscv/include/asm/ptrace.h index 2c5df945d43c9..bbe1862e8f80c 100644 --- a/arch/riscv/include/asm/ptrace.h +++ b/arch/riscv/include/asm/ptrace.h @@ -56,8 +56,8 @@ struct pt_regs { unsigned long sstatus; unsigned long sbadaddr; unsigned long scause; - /* a0 value before the syscall */ - unsigned long orig_a0; + /* a0 value before the syscall */ + unsigned long orig_a0; }; #ifdef CONFIG_64BIT From f157d411a9eb170d2ee6b766da7a381962017cc9 Mon Sep 17 00:00:00 2001 From: David Abdurachmanov <david.abdurachmanov@gmail.com> Date: Mon, 5 Nov 2018 15:35:37 +0100 Subject: [PATCH 386/443] riscv: add missing vdso_install target Building kernel 4.20 for Fedora as RPM fails, because riscv is missing vdso_install target in arch/riscv/Makefile. Signed-off-by: David Abdurachmanov <david.abdurachmanov@gmail.com> Signed-off-by: Palmer Dabbelt <palmer@sifive.com> --- arch/riscv/Makefile | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/arch/riscv/Makefile b/arch/riscv/Makefile index d10146197533a..4af153a182b07 100644 --- a/arch/riscv/Makefile +++ b/arch/riscv/Makefile @@ -77,4 +77,8 @@ core-y += arch/riscv/kernel/ arch/riscv/mm/ libs-y += arch/riscv/lib/ +PHONY += vdso_install +vdso_install: + $(Q)$(MAKE) $(build)=arch/riscv/kernel/vdso $@ + all: vmlinux From 85d90b91807bb0c4a0fcff6a144e73f11cda782a Mon Sep 17 00:00:00 2001 From: Olof Johansson <olof@lixom.net> Date: Tue, 30 Oct 2018 23:47:07 -0700 Subject: [PATCH 387/443] RISC-V: lib: Fix build error for 64-bit Fixes the following build error from tinyconfig: riscv64-unknown-linux-gnu-ld: kernel/sched/fair.o: in function `.L8': fair.c:(.text+0x70): undefined reference to `__lshrti3' riscv64-unknown-linux-gnu-ld: kernel/time/clocksource.o: in function `.L0 ': clocksource.c:(.text+0x334): undefined reference to `__lshrti3' Fixes: 7f47c73b355f ("RISC-V: Build tishift only on 64-bit") Signed-off-by: Olof Johansson <olof@lixom.net> Signed-off-by: Palmer Dabbelt <palmer@sifive.com> --- arch/riscv/lib/Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/riscv/lib/Makefile b/arch/riscv/lib/Makefile index 5739bd05d289e..4e2e600f7d538 100644 --- a/arch/riscv/lib/Makefile +++ b/arch/riscv/lib/Makefile @@ -3,6 +3,6 @@ lib-y += memcpy.o lib-y += memset.o lib-y += uaccess.o -lib-(CONFIG_64BIT) += tishift.o +lib-$(CONFIG_64BIT) += tishift.o lib-$(CONFIG_32BIT) += udivdi3.o From ef3a61406618291c46da168ff91acaa28d85944c Mon Sep 17 00:00:00 2001 From: Olof Johansson <olof@lixom.net> Date: Tue, 30 Oct 2018 23:47:09 -0700 Subject: [PATCH 388/443] RISC-V: Silence some module warnings on 32-bit Fixes: arch/riscv/kernel/module.c: In function 'apply_r_riscv_32_rela': ./include/linux/kern_levels.h:5:18: warning: format '%llx' expects argument of type 'long long unsigned int', but argument 3 has type 'Elf32_Addr' {aka 'unsigned int'} [-Wformat=] arch/riscv/kernel/module.c:23:27: note: format string is defined here arch/riscv/kernel/module.c: In function 'apply_r_riscv_pcrel_hi20_rela': ./include/linux/kern_levels.h:5:18: warning: format '%llx' expects argument of type 'long long unsigned int', but argument 3 has type 'Elf32_Addr' {aka 'unsigned int'} [-Wformat=] arch/riscv/kernel/module.c:104:23: note: format string is defined here arch/riscv/kernel/module.c: In function 'apply_r_riscv_hi20_rela': ./include/linux/kern_levels.h:5:18: warning: format '%llx' expects argument of type 'long long unsigned int', but argument 3 has type 'Elf32_Addr' {aka 'unsigned int'} [-Wformat=] arch/riscv/kernel/module.c:146:23: note: format string is defined here arch/riscv/kernel/module.c: In function 'apply_r_riscv_got_hi20_rela': ./include/linux/kern_levels.h:5:18: warning: format '%llx' expects argument of type 'long long unsigned int', but argument 3 has type 'Elf32_Addr' {aka 'unsigned int'} [-Wformat=] arch/riscv/kernel/module.c:190:60: note: format string is defined here arch/riscv/kernel/module.c: In function 'apply_r_riscv_call_plt_rela': ./include/linux/kern_levels.h:5:18: warning: format '%llx' expects argument of type 'long long unsigned int', but argument 3 has type 'Elf32_Addr' {aka 'unsigned int'} [-Wformat=] arch/riscv/kernel/module.c:214:24: note: format string is defined here arch/riscv/kernel/module.c: In function 'apply_r_riscv_call_rela': ./include/linux/kern_levels.h:5:18: warning: format '%llx' expects argument of type 'long long unsigned int', but argument 3 has type 'Elf32_Addr' {aka 'unsigned int'} [-Wformat=] arch/riscv/kernel/module.c:236:23: note: format string is defined here Signed-off-by: Olof Johansson <olof@lixom.net> Signed-off-by: Palmer Dabbelt <palmer@sifive.com> --- arch/riscv/kernel/module.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/arch/riscv/kernel/module.c b/arch/riscv/kernel/module.c index 3303ed2cd4193..7dd308129b40f 100644 --- a/arch/riscv/kernel/module.c +++ b/arch/riscv/kernel/module.c @@ -21,7 +21,7 @@ static int apply_r_riscv_32_rela(struct module *me, u32 *location, Elf_Addr v) { if (v != (u32)v) { pr_err("%s: value %016llx out of range for 32-bit field\n", - me->name, v); + me->name, (long long)v); return -EINVAL; } *location = v; @@ -102,7 +102,7 @@ static int apply_r_riscv_pcrel_hi20_rela(struct module *me, u32 *location, if (offset != (s32)offset) { pr_err( "%s: target %016llx can not be addressed by the 32-bit offset from PC = %p\n", - me->name, v, location); + me->name, (long long)v, location); return -EINVAL; } @@ -144,7 +144,7 @@ static int apply_r_riscv_hi20_rela(struct module *me, u32 *location, if (IS_ENABLED(CMODEL_MEDLOW)) { pr_err( "%s: target %016llx can not be addressed by the 32-bit offset from PC = %p\n", - me->name, v, location); + me->name, (long long)v, location); return -EINVAL; } @@ -188,7 +188,7 @@ static int apply_r_riscv_got_hi20_rela(struct module *me, u32 *location, } else { pr_err( "%s: can not generate the GOT entry for symbol = %016llx from PC = %p\n", - me->name, v, location); + me->name, (long long)v, location); return -EINVAL; } @@ -212,7 +212,7 @@ static int apply_r_riscv_call_plt_rela(struct module *me, u32 *location, } else { pr_err( "%s: target %016llx can not be addressed by the 32-bit offset from PC = %p\n", - me->name, v, location); + me->name, (long long)v, location); return -EINVAL; } } @@ -234,7 +234,7 @@ static int apply_r_riscv_call_rela(struct module *me, u32 *location, if (offset != fill_v) { pr_err( "%s: target %016llx can not be addressed by the 32-bit offset from PC = %p\n", - me->name, v, location); + me->name, (long long)v, location); return -EINVAL; } From adf59dd2408c4536d490bee649784f0465562444 Mon Sep 17 00:00:00 2001 From: Jorge Ramirez-Ortiz <jorge.ramirez.ortiz@gmail.com> Date: Mon, 12 Nov 2018 19:41:09 +0100 Subject: [PATCH 389/443] drm/meson: venc: dmt mode must use encp The video mode for DMT is only populated to support encp. Signed-off-by: Jorge Ramirez-Ortiz <jorge.ramirez.ortiz@gmail.com> Acked-by: Neil Armstrong <narmstrong@baylibre.com> Signed-off-by: Neil Armstrong <narmstrong@baylibre.com> Link: https://patchwork.freedesktop.org/patch/msgid/1542048069-22603-1-git-send-email-jramirez@baylibre.com --- drivers/gpu/drm/meson/meson_venc.c | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) diff --git a/drivers/gpu/drm/meson/meson_venc.c b/drivers/gpu/drm/meson/meson_venc.c index 514245e69b384..acbbad3e322ca 100644 --- a/drivers/gpu/drm/meson/meson_venc.c +++ b/drivers/gpu/drm/meson/meson_venc.c @@ -854,6 +854,13 @@ void meson_venc_hdmi_mode_set(struct meson_drm *priv, int vic, unsigned int sof_lines; unsigned int vsync_lines; + /* Use VENCI for 480i and 576i and double HDMI pixels */ + if (mode->flags & DRM_MODE_FLAG_DBLCLK) { + hdmi_repeat = true; + use_enci = true; + venc_hdmi_latency = 1; + } + if (meson_venc_hdmi_supported_vic(vic)) { vmode = meson_venc_hdmi_get_vic_vmode(vic); if (!vmode) { @@ -865,13 +872,7 @@ void meson_venc_hdmi_mode_set(struct meson_drm *priv, int vic, } else { meson_venc_hdmi_get_dmt_vmode(mode, &vmode_dmt); vmode = &vmode_dmt; - } - - /* Use VENCI for 480i and 576i and double HDMI pixels */ - if (mode->flags & DRM_MODE_FLAG_DBLCLK) { - hdmi_repeat = true; - use_enci = true; - venc_hdmi_latency = 1; + use_enci = false; } /* Repeat VENC pixels for 480/576i/p, 720p50/60 and 1080p50/60 */ From 0d76bcc960e6057750fcf556b65da13f8bbdfd2b Mon Sep 17 00:00:00 2001 From: Bjorn Helgaas <bhelgaas@google.com> Date: Tue, 13 Nov 2018 08:38:17 -0600 Subject: [PATCH 390/443] Revert "ACPI/PCI: Pay attention to device-specific _PXM node values" This reverts commit bad7dcd94f3956bcfc0a69ef71fdf0fcca3de4a8. bad7dcd94f39 ("ACPI/PCI: Pay attention to device-specific _PXM node values") caused boot failures (no console output at all) for Martin [1] and Ingo [2] on AMD ThreadRipper systems. Revert the commit until we figure out how to safely use these device-specific _PXM values. [1] https://lore.kernel.org/linux-pci/20180912152140.3676-2-Jonathan.Cameron@huawei.com [2] https://lore.kernel.org/linux-pci/20181113071712.GA2353@gmail.com Fixes: bad7dcd94f39 ("ACPI/PCI: Pay attention to device-specific _PXM node values") Signed-off-by: Bjorn Helgaas <bhelgaas@google.com> --- drivers/pci/pci-acpi.c | 5 ----- 1 file changed, 5 deletions(-) diff --git a/drivers/pci/pci-acpi.c b/drivers/pci/pci-acpi.c index 2a4aa64685794..921db6f803403 100644 --- a/drivers/pci/pci-acpi.c +++ b/drivers/pci/pci-acpi.c @@ -793,15 +793,10 @@ static void pci_acpi_setup(struct device *dev) { struct pci_dev *pci_dev = to_pci_dev(dev); struct acpi_device *adev = ACPI_COMPANION(dev); - int node; if (!adev) return; - node = acpi_get_node(adev->handle); - if (node != NUMA_NO_NODE) - set_dev_node(dev, node); - pci_acpi_optimize_delay(pci_dev, adev->handle); pci_acpi_add_pm_notifier(adev, pci_dev); From c837243ff4017f493c7d6f4ab57278d812a86859 Mon Sep 17 00:00:00 2001 From: Philip Yang <Philip.Yang@amd.com> Date: Mon, 12 Nov 2018 14:00:45 -0500 Subject: [PATCH 391/443] drm/amdgpu: fix bug with IH ring setup MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The bug limits the IH ring wptr address to 40bit. When the system memory is bigger than 1TB, the bus address is more than 40bit, this causes the interrupt cannot be handled and cleared correctly. Reviewed-by: Christian König <christian.koenig@amd.com> Signed-off-by: Philip Yang <Philip.Yang@amd.com> Reviewed-by: Alex Deucher <alexander.deucher@amd.com> Signed-off-by: Alex Deucher <alexander.deucher@amd.com> --- drivers/gpu/drm/amd/amdgpu/vega10_ih.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/amdgpu/vega10_ih.c b/drivers/gpu/drm/amd/amdgpu/vega10_ih.c index a99f71797aa35..a0fda6f9252a5 100644 --- a/drivers/gpu/drm/amd/amdgpu/vega10_ih.c +++ b/drivers/gpu/drm/amd/amdgpu/vega10_ih.c @@ -129,7 +129,7 @@ static int vega10_ih_irq_init(struct amdgpu_device *adev) else wptr_off = adev->wb.gpu_addr + (adev->irq.ih.wptr_offs * 4); WREG32_SOC15(OSSSYS, 0, mmIH_RB_WPTR_ADDR_LO, lower_32_bits(wptr_off)); - WREG32_SOC15(OSSSYS, 0, mmIH_RB_WPTR_ADDR_HI, upper_32_bits(wptr_off) & 0xFF); + WREG32_SOC15(OSSSYS, 0, mmIH_RB_WPTR_ADDR_HI, upper_32_bits(wptr_off) & 0xFFFF); /* set rptr, wptr to 0 */ WREG32_SOC15(OSSSYS, 0, mmIH_RB_RPTR, 0); From 4d454e9ffdb1ef5a51ebc147b5389c96048db683 Mon Sep 17 00:00:00 2001 From: Rex Zhu <Rex.Zhu@amd.com> Date: Tue, 13 Nov 2018 11:15:56 +0800 Subject: [PATCH 392/443] drm/amd/pp: Fix truncated clock value when set watermark the clk value should be tranferred to MHz first and then transfer to uint16. otherwise, the clock value will be truncated. Reviewed-by: Alex Deucher <alexander.deucher@amd.com> Reported-by: Hersen Wu <hersenxs.wu@amd.com> Signed-off-by: Rex Zhu <Rex.Zhu@amd.com> Signed-off-by: Alex Deucher <alexander.deucher@amd.com> Cc: stable@vger.kernel.org --- .../gpu/drm/amd/powerplay/hwmgr/smu_helper.c | 32 +++++++++---------- 1 file changed, 16 insertions(+), 16 deletions(-) diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/smu_helper.c b/drivers/gpu/drm/amd/powerplay/hwmgr/smu_helper.c index 99a33c33a32c9..101c09b212ade 100644 --- a/drivers/gpu/drm/amd/powerplay/hwmgr/smu_helper.c +++ b/drivers/gpu/drm/amd/powerplay/hwmgr/smu_helper.c @@ -713,20 +713,20 @@ int smu_set_watermarks_for_clocks_ranges(void *wt_table, for (i = 0; i < wm_with_clock_ranges->num_wm_dmif_sets; i++) { table->WatermarkRow[1][i].MinClock = cpu_to_le16((uint16_t) - (wm_with_clock_ranges->wm_dmif_clocks_ranges[i].wm_min_dcfclk_clk_in_khz) / - 1000); + (wm_with_clock_ranges->wm_dmif_clocks_ranges[i].wm_min_dcfclk_clk_in_khz / + 1000)); table->WatermarkRow[1][i].MaxClock = cpu_to_le16((uint16_t) - (wm_with_clock_ranges->wm_dmif_clocks_ranges[i].wm_max_dcfclk_clk_in_khz) / - 1000); + (wm_with_clock_ranges->wm_dmif_clocks_ranges[i].wm_max_dcfclk_clk_in_khz / + 1000)); table->WatermarkRow[1][i].MinUclk = cpu_to_le16((uint16_t) - (wm_with_clock_ranges->wm_dmif_clocks_ranges[i].wm_min_mem_clk_in_khz) / - 1000); + (wm_with_clock_ranges->wm_dmif_clocks_ranges[i].wm_min_mem_clk_in_khz / + 1000)); table->WatermarkRow[1][i].MaxUclk = cpu_to_le16((uint16_t) - (wm_with_clock_ranges->wm_dmif_clocks_ranges[i].wm_max_mem_clk_in_khz) / - 1000); + (wm_with_clock_ranges->wm_dmif_clocks_ranges[i].wm_max_mem_clk_in_khz / + 1000)); table->WatermarkRow[1][i].WmSetting = (uint8_t) wm_with_clock_ranges->wm_dmif_clocks_ranges[i].wm_set_id; } @@ -734,20 +734,20 @@ int smu_set_watermarks_for_clocks_ranges(void *wt_table, for (i = 0; i < wm_with_clock_ranges->num_wm_mcif_sets; i++) { table->WatermarkRow[0][i].MinClock = cpu_to_le16((uint16_t) - (wm_with_clock_ranges->wm_mcif_clocks_ranges[i].wm_min_socclk_clk_in_khz) / - 1000); + (wm_with_clock_ranges->wm_mcif_clocks_ranges[i].wm_min_socclk_clk_in_khz / + 1000)); table->WatermarkRow[0][i].MaxClock = cpu_to_le16((uint16_t) - (wm_with_clock_ranges->wm_mcif_clocks_ranges[i].wm_max_socclk_clk_in_khz) / - 1000); + (wm_with_clock_ranges->wm_mcif_clocks_ranges[i].wm_max_socclk_clk_in_khz / + 1000)); table->WatermarkRow[0][i].MinUclk = cpu_to_le16((uint16_t) - (wm_with_clock_ranges->wm_mcif_clocks_ranges[i].wm_min_mem_clk_in_khz) / - 1000); + (wm_with_clock_ranges->wm_mcif_clocks_ranges[i].wm_min_mem_clk_in_khz / + 1000)); table->WatermarkRow[0][i].MaxUclk = cpu_to_le16((uint16_t) - (wm_with_clock_ranges->wm_mcif_clocks_ranges[i].wm_max_mem_clk_in_khz) / - 1000); + (wm_with_clock_ranges->wm_mcif_clocks_ranges[i].wm_max_mem_clk_in_khz / + 1000)); table->WatermarkRow[0][i].WmSetting = (uint8_t) wm_with_clock_ranges->wm_mcif_clocks_ranges[i].wm_set_id; } From c1a17777eb45d9f3821f35e9869c0a08cd2e664e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Christian=20K=C3=B6nig?= <christian.koenig@amd.com> Date: Mon, 12 Nov 2018 18:08:31 +0100 Subject: [PATCH 393/443] drm/amdgpu: fix huge page handling on Vega10 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit We accidentially set the huge flag on the parent instead of the childs. This caused some VM faults under memory pressure. Signed-off-by: Christian König <christian.koenig@amd.com> Acked-by: Alex Deucher <alexander.deucher@amd.com> Tested-by: Samuel Pitoiset <samuel.pitoiset@gmail.com> Reviewed-by: Felix Kuehling <Felix.Kuehling@amd.com> Signed-off-by: Alex Deucher <alexander.deucher@amd.com> Cc: stable@vger.kernel.org --- drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 18 ++++++++++-------- 1 file changed, 10 insertions(+), 8 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c index 352b304090602..dad0e2342df9d 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c @@ -1632,13 +1632,6 @@ static int amdgpu_vm_update_ptes(struct amdgpu_pte_update_params *params, continue; } - /* First check if the entry is already handled */ - if (cursor.pfn < frag_start) { - cursor.entry->huge = true; - amdgpu_vm_pt_next(adev, &cursor); - continue; - } - /* If it isn't already handled it can't be a huge page */ if (cursor.entry->huge) { /* Add the entry to the relocated list to update it. */ @@ -1701,8 +1694,17 @@ static int amdgpu_vm_update_ptes(struct amdgpu_pte_update_params *params, } } while (frag_start < entry_end); - if (frag >= shift) + if (amdgpu_vm_pt_descendant(adev, &cursor)) { + /* Mark all child entries as huge */ + while (cursor.pfn < frag_start) { + cursor.entry->huge = true; + amdgpu_vm_pt_next(adev, &cursor); + } + + } else if (frag >= shift) { + /* or just move on to the next on the same level. */ amdgpu_vm_pt_next(adev, &cursor); + } } return 0; From c138325fb8713472d5a0c3c7258b9131bab40725 Mon Sep 17 00:00:00 2001 From: Ondrej Mosnacek <omosnace@redhat.com> Date: Tue, 13 Nov 2018 16:16:08 +0100 Subject: [PATCH 394/443] selinux: check length properly in SCTP bind hook selinux_sctp_bind_connect() must verify if the address buffer has sufficient length before accessing the 'sa_family' field. See __sctp_connect() for a similar check. The length of the whole address ('len') is already checked in the callees. Reported-by: Qian Cai <cai@gmx.us> Fixes: d452930fd3b9 ("selinux: Add SCTP support") Cc: <stable@vger.kernel.org> # 4.17+ Cc: Richard Haines <richard_c_haines@btinternet.com> Signed-off-by: Ondrej Mosnacek <omosnace@redhat.com> Tested-by: Qian Cai <cai@gmx.us> Signed-off-by: Paul Moore <paul@paul-moore.com> --- security/selinux/hooks.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/security/selinux/hooks.c b/security/selinux/hooks.c index 18b98b5e1e3c4..fe251c6f09f10 100644 --- a/security/selinux/hooks.c +++ b/security/selinux/hooks.c @@ -5318,6 +5318,9 @@ static int selinux_sctp_bind_connect(struct sock *sk, int optname, addr_buf = address; while (walk_size < addrlen) { + if (walk_size + sizeof(sa_family_t) > addrlen) + return -EINVAL; + addr = addr_buf; switch (addr->sa_family) { case AF_UNSPEC: From dded2e159208a9edc21dd5c5f583afa28d378d39 Mon Sep 17 00:00:00 2001 From: Christophe Leroy <christophe.leroy@c-s.fr> Date: Thu, 27 Sep 2018 17:17:49 +0000 Subject: [PATCH 395/443] kdb: use correct pointer when 'btc' calls 'btt' On a powerpc 8xx, 'btc' fails as follows: Entering kdb (current=0x(ptrval), pid 282) due to Keyboard Entry kdb> btc btc: cpu status: Currently on cpu 0 Available cpus: 0 kdb_getarea: Bad address 0x0 when booting the kernel with 'debug_boot_weak_hash', it fails as well Entering kdb (current=0xba99ad80, pid 284) due to Keyboard Entry kdb> btc btc: cpu status: Currently on cpu 0 Available cpus: 0 kdb_getarea: Bad address 0xba99ad80 On other platforms, Oopses have been observed too, see https://github.com/linuxppc/linux/issues/139 This is due to btc calling 'btt' with %p pointer as an argument. This patch replaces %p by %px to get the real pointer value as expected by 'btt' Fixes: ad67b74d2469 ("printk: hash addresses printed with %p") Cc: <stable@vger.kernel.org> Signed-off-by: Christophe Leroy <christophe.leroy@c-s.fr> Reviewed-by: Daniel Thompson <daniel.thompson@linaro.org> Signed-off-by: Daniel Thompson <daniel.thompson@linaro.org> --- kernel/debug/kdb/kdb_bt.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/kernel/debug/kdb/kdb_bt.c b/kernel/debug/kdb/kdb_bt.c index 6ad4a9fcbd6f7..7921ae4fca8de 100644 --- a/kernel/debug/kdb/kdb_bt.c +++ b/kernel/debug/kdb/kdb_bt.c @@ -179,14 +179,14 @@ kdb_bt(int argc, const char **argv) kdb_printf("no process for cpu %ld\n", cpu); return 0; } - sprintf(buf, "btt 0x%p\n", KDB_TSK(cpu)); + sprintf(buf, "btt 0x%px\n", KDB_TSK(cpu)); kdb_parse(buf); return 0; } kdb_printf("btc: cpu status: "); kdb_parse("cpu\n"); for_each_online_cpu(cpu) { - sprintf(buf, "btt 0x%p\n", KDB_TSK(cpu)); + sprintf(buf, "btt 0x%px\n", KDB_TSK(cpu)); kdb_parse(buf); touch_nmi_watchdog(); } From 568fb6f42ac6851320adaea25f8f1b94de14e40a Mon Sep 17 00:00:00 2001 From: Christophe Leroy <christophe.leroy@c-s.fr> Date: Thu, 27 Sep 2018 17:17:57 +0000 Subject: [PATCH 396/443] kdb: print real address of pointers instead of hashed addresses Since commit ad67b74d2469 ("printk: hash addresses printed with %p"), all pointers printed with %p are printed with hashed addresses instead of real addresses in order to avoid leaking addresses in dmesg and syslog. But this applies to kdb too, with is unfortunate: Entering kdb (current=0x(ptrval), pid 329) due to Keyboard Entry kdb> ps 15 sleeping system daemon (state M) processes suppressed, use 'ps A' to see all. Task Addr Pid Parent [*] cpu State Thread Command 0x(ptrval) 329 328 1 0 R 0x(ptrval) *sh 0x(ptrval) 1 0 0 0 S 0x(ptrval) init 0x(ptrval) 3 2 0 0 D 0x(ptrval) rcu_gp 0x(ptrval) 4 2 0 0 D 0x(ptrval) rcu_par_gp 0x(ptrval) 5 2 0 0 D 0x(ptrval) kworker/0:0 0x(ptrval) 6 2 0 0 D 0x(ptrval) kworker/0:0H 0x(ptrval) 7 2 0 0 D 0x(ptrval) kworker/u2:0 0x(ptrval) 8 2 0 0 D 0x(ptrval) mm_percpu_wq 0x(ptrval) 10 2 0 0 D 0x(ptrval) rcu_preempt The whole purpose of kdb is to debug, and for debugging real addresses need to be known. In addition, data displayed by kdb doesn't go into dmesg. This patch replaces all %p by %px in kdb in order to display real addresses. Fixes: ad67b74d2469 ("printk: hash addresses printed with %p") Cc: <stable@vger.kernel.org> Signed-off-by: Christophe Leroy <christophe.leroy@c-s.fr> Signed-off-by: Daniel Thompson <daniel.thompson@linaro.org> --- kernel/debug/kdb/kdb_main.c | 14 +++++++------- kernel/debug/kdb/kdb_support.c | 12 ++++++------ 2 files changed, 13 insertions(+), 13 deletions(-) diff --git a/kernel/debug/kdb/kdb_main.c b/kernel/debug/kdb/kdb_main.c index bb4fe4e1a601b..959242084b404 100644 --- a/kernel/debug/kdb/kdb_main.c +++ b/kernel/debug/kdb/kdb_main.c @@ -1192,7 +1192,7 @@ static int kdb_local(kdb_reason_t reason, int error, struct pt_regs *regs, if (reason == KDB_REASON_DEBUG) { /* special case below */ } else { - kdb_printf("\nEntering kdb (current=0x%p, pid %d) ", + kdb_printf("\nEntering kdb (current=0x%px, pid %d) ", kdb_current, kdb_current ? kdb_current->pid : 0); #if defined(CONFIG_SMP) kdb_printf("on processor %d ", raw_smp_processor_id()); @@ -1208,7 +1208,7 @@ static int kdb_local(kdb_reason_t reason, int error, struct pt_regs *regs, */ switch (db_result) { case KDB_DB_BPT: - kdb_printf("\nEntering kdb (0x%p, pid %d) ", + kdb_printf("\nEntering kdb (0x%px, pid %d) ", kdb_current, kdb_current->pid); #if defined(CONFIG_SMP) kdb_printf("on processor %d ", raw_smp_processor_id()); @@ -2048,7 +2048,7 @@ static int kdb_lsmod(int argc, const char **argv) if (mod->state == MODULE_STATE_UNFORMED) continue; - kdb_printf("%-20s%8u 0x%p ", mod->name, + kdb_printf("%-20s%8u 0x%px ", mod->name, mod->core_layout.size, (void *)mod); #ifdef CONFIG_MODULE_UNLOAD kdb_printf("%4d ", module_refcount(mod)); @@ -2059,7 +2059,7 @@ static int kdb_lsmod(int argc, const char **argv) kdb_printf(" (Loading)"); else kdb_printf(" (Live)"); - kdb_printf(" 0x%p", mod->core_layout.base); + kdb_printf(" 0x%px", mod->core_layout.base); #ifdef CONFIG_MODULE_UNLOAD { @@ -2341,7 +2341,7 @@ void kdb_ps1(const struct task_struct *p) return; cpu = kdb_process_cpu(p); - kdb_printf("0x%p %8d %8d %d %4d %c 0x%p %c%s\n", + kdb_printf("0x%px %8d %8d %d %4d %c 0x%px %c%s\n", (void *)p, p->pid, p->parent->pid, kdb_task_has_cpu(p), kdb_process_cpu(p), kdb_task_state_char(p), @@ -2354,7 +2354,7 @@ void kdb_ps1(const struct task_struct *p) } else { if (KDB_TSK(cpu) != p) kdb_printf(" Error: does not match running " - "process table (0x%p)\n", KDB_TSK(cpu)); + "process table (0x%px)\n", KDB_TSK(cpu)); } } } @@ -2687,7 +2687,7 @@ int kdb_register_flags(char *cmd, for_each_kdbcmd(kp, i) { if (kp->cmd_name && (strcmp(kp->cmd_name, cmd) == 0)) { kdb_printf("Duplicate kdb command registered: " - "%s, func %p help %s\n", cmd, func, help); + "%s, func %px help %s\n", cmd, func, help); return 1; } } diff --git a/kernel/debug/kdb/kdb_support.c b/kernel/debug/kdb/kdb_support.c index 990b3cc526c80..987eb73284d2d 100644 --- a/kernel/debug/kdb/kdb_support.c +++ b/kernel/debug/kdb/kdb_support.c @@ -40,7 +40,7 @@ int kdbgetsymval(const char *symname, kdb_symtab_t *symtab) { if (KDB_DEBUG(AR)) - kdb_printf("kdbgetsymval: symname=%s, symtab=%p\n", symname, + kdb_printf("kdbgetsymval: symname=%s, symtab=%px\n", symname, symtab); memset(symtab, 0, sizeof(*symtab)); symtab->sym_start = kallsyms_lookup_name(symname); @@ -88,7 +88,7 @@ int kdbnearsym(unsigned long addr, kdb_symtab_t *symtab) char *knt1 = NULL; if (KDB_DEBUG(AR)) - kdb_printf("kdbnearsym: addr=0x%lx, symtab=%p\n", addr, symtab); + kdb_printf("kdbnearsym: addr=0x%lx, symtab=%px\n", addr, symtab); memset(symtab, 0, sizeof(*symtab)); if (addr < 4096) @@ -149,7 +149,7 @@ int kdbnearsym(unsigned long addr, kdb_symtab_t *symtab) symtab->mod_name = "kernel"; if (KDB_DEBUG(AR)) kdb_printf("kdbnearsym: returns %d symtab->sym_start=0x%lx, " - "symtab->mod_name=%p, symtab->sym_name=%p (%s)\n", ret, + "symtab->mod_name=%px, symtab->sym_name=%px (%s)\n", ret, symtab->sym_start, symtab->mod_name, symtab->sym_name, symtab->sym_name); @@ -887,13 +887,13 @@ void debug_kusage(void) __func__, dah_first); if (dah_first) { h_used = (struct debug_alloc_header *)debug_alloc_pool; - kdb_printf("%s: h_used %p size %d\n", __func__, h_used, + kdb_printf("%s: h_used %px size %d\n", __func__, h_used, h_used->size); } do { h_used = (struct debug_alloc_header *) ((char *)h_free + dah_overhead + h_free->size); - kdb_printf("%s: h_used %p size %d caller %p\n", + kdb_printf("%s: h_used %px size %d caller %px\n", __func__, h_used, h_used->size, h_used->caller); h_free = (struct debug_alloc_header *) (debug_alloc_pool + h_free->next); @@ -902,7 +902,7 @@ void debug_kusage(void) ((char *)h_free + dah_overhead + h_free->size); if ((char *)h_used - debug_alloc_pool != sizeof(debug_alloc_pool_aligned)) - kdb_printf("%s: h_used %p size %d caller %p\n", + kdb_printf("%s: h_used %px size %d caller %px\n", __func__, h_used, h_used->size, h_used->caller); out: spin_unlock(&dap_lock); From c2b94c72d93d0929f48157eef128c4f9d2e603ce Mon Sep 17 00:00:00 2001 From: Prarit Bhargava <prarit@redhat.com> Date: Thu, 20 Sep 2018 08:59:14 -0400 Subject: [PATCH 397/443] kdb: Use strscpy with destination buffer size MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit gcc 8.1.0 warns with: kernel/debug/kdb/kdb_support.c: In function ‘kallsyms_symbol_next’: kernel/debug/kdb/kdb_support.c:239:4: warning: ‘strncpy’ specified bound depends on the length of the source argument [-Wstringop-overflow=] strncpy(prefix_name, name, strlen(name)+1); ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ kernel/debug/kdb/kdb_support.c:239:31: note: length computed here Use strscpy() with the destination buffer size, and use ellipses when displaying truncated symbols. v2: Use strscpy() Signed-off-by: Prarit Bhargava <prarit@redhat.com> Cc: Jonathan Toppins <jtoppins@redhat.com> Cc: Jason Wessel <jason.wessel@windriver.com> Cc: Daniel Thompson <daniel.thompson@linaro.org> Cc: kgdb-bugreport@lists.sourceforge.net Reviewed-by: Daniel Thompson <daniel.thompson@linaro.org> Signed-off-by: Daniel Thompson <daniel.thompson@linaro.org> --- kernel/debug/kdb/kdb_io.c | 15 +++++++++------ kernel/debug/kdb/kdb_private.h | 2 +- kernel/debug/kdb/kdb_support.c | 10 +++++----- 3 files changed, 15 insertions(+), 12 deletions(-) diff --git a/kernel/debug/kdb/kdb_io.c b/kernel/debug/kdb/kdb_io.c index ed5d34925ad06..6a4b41484afe6 100644 --- a/kernel/debug/kdb/kdb_io.c +++ b/kernel/debug/kdb/kdb_io.c @@ -216,7 +216,7 @@ static char *kdb_read(char *buffer, size_t bufsize) int count; int i; int diag, dtab_count; - int key; + int key, buf_size, ret; diag = kdbgetintenv("DTABCOUNT", &dtab_count); @@ -336,9 +336,8 @@ static char *kdb_read(char *buffer, size_t bufsize) else p_tmp = tmpbuffer; len = strlen(p_tmp); - count = kallsyms_symbol_complete(p_tmp, - sizeof(tmpbuffer) - - (p_tmp - tmpbuffer)); + buf_size = sizeof(tmpbuffer) - (p_tmp - tmpbuffer); + count = kallsyms_symbol_complete(p_tmp, buf_size); if (tab == 2 && count > 0) { kdb_printf("\n%d symbols are found.", count); if (count > dtab_count) { @@ -350,9 +349,13 @@ static char *kdb_read(char *buffer, size_t bufsize) } kdb_printf("\n"); for (i = 0; i < count; i++) { - if (WARN_ON(!kallsyms_symbol_next(p_tmp, i))) + ret = kallsyms_symbol_next(p_tmp, i, buf_size); + if (WARN_ON(!ret)) break; - kdb_printf("%s ", p_tmp); + if (ret != -E2BIG) + kdb_printf("%s ", p_tmp); + else + kdb_printf("%s... ", p_tmp); *(p_tmp + len) = '\0'; } if (i >= dtab_count) diff --git a/kernel/debug/kdb/kdb_private.h b/kernel/debug/kdb/kdb_private.h index 1e5a502ba4a7b..2118d8258b7c9 100644 --- a/kernel/debug/kdb/kdb_private.h +++ b/kernel/debug/kdb/kdb_private.h @@ -83,7 +83,7 @@ typedef struct __ksymtab { unsigned long sym_start; unsigned long sym_end; } kdb_symtab_t; -extern int kallsyms_symbol_next(char *prefix_name, int flag); +extern int kallsyms_symbol_next(char *prefix_name, int flag, int buf_size); extern int kallsyms_symbol_complete(char *prefix_name, int max_len); /* Exported Symbols for kernel loadable modules to use. */ diff --git a/kernel/debug/kdb/kdb_support.c b/kernel/debug/kdb/kdb_support.c index 987eb73284d2d..b14b0925c1848 100644 --- a/kernel/debug/kdb/kdb_support.c +++ b/kernel/debug/kdb/kdb_support.c @@ -221,11 +221,13 @@ int kallsyms_symbol_complete(char *prefix_name, int max_len) * Parameters: * prefix_name prefix of a symbol name to lookup * flag 0 means search from the head, 1 means continue search. + * buf_size maximum length that can be written to prefix_name + * buffer * Returns: * 1 if a symbol matches the given prefix. * 0 if no string found */ -int kallsyms_symbol_next(char *prefix_name, int flag) +int kallsyms_symbol_next(char *prefix_name, int flag, int buf_size) { int prefix_len = strlen(prefix_name); static loff_t pos; @@ -235,10 +237,8 @@ int kallsyms_symbol_next(char *prefix_name, int flag) pos = 0; while ((name = kdb_walk_kallsyms(&pos))) { - if (strncmp(name, prefix_name, prefix_len) == 0) { - strncpy(prefix_name, name, strlen(name)+1); - return 1; - } + if (!strncmp(name, prefix_name, prefix_len)) + return strscpy(prefix_name, name, buf_size); } return 0; } From 9eb62f0e1bc70ebc9b15837a0c4e8f12a7b910cb Mon Sep 17 00:00:00 2001 From: "Gustavo A. R. Silva" <gustavo@embeddedor.com> Date: Thu, 16 Aug 2018 09:01:41 -0500 Subject: [PATCH 398/443] kdb: kdb_main: refactor code in kdb_md_line Replace the whole switch statement with a for loop. This makes the code clearer and easy to read. This also addresses the following Coverity warnings: Addresses-Coverity-ID: 115090 ("Missing break in switch") Addresses-Coverity-ID: 115091 ("Missing break in switch") Addresses-Coverity-ID: 114700 ("Missing break in switch") Suggested-by: Daniel Thompson <daniel.thompson@linaro.org> Signed-off-by: Gustavo A. R. Silva <gustavo@embeddedor.com> Reviewed-by: Daniel Thompson <daniel.thompson@linaro.org> [daniel.thompson@linaro.org: Tiny grammar change in description] Signed-off-by: Daniel Thompson <daniel.thompson@linaro.org> --- kernel/debug/kdb/kdb_main.c | 21 +++------------------ 1 file changed, 3 insertions(+), 18 deletions(-) diff --git a/kernel/debug/kdb/kdb_main.c b/kernel/debug/kdb/kdb_main.c index 959242084b404..d72b32c66f7dd 100644 --- a/kernel/debug/kdb/kdb_main.c +++ b/kernel/debug/kdb/kdb_main.c @@ -1493,6 +1493,7 @@ static void kdb_md_line(const char *fmtstr, unsigned long addr, char cbuf[32]; char *c = cbuf; int i; + int j; unsigned long word; memset(cbuf, '\0', sizeof(cbuf)); @@ -1538,25 +1539,9 @@ static void kdb_md_line(const char *fmtstr, unsigned long addr, wc.word = word; #define printable_char(c) \ ({unsigned char __c = c; isascii(__c) && isprint(__c) ? __c : '.'; }) - switch (bytesperword) { - case 8: + for (j = 0; j < bytesperword; j++) *c++ = printable_char(*cp++); - *c++ = printable_char(*cp++); - *c++ = printable_char(*cp++); - *c++ = printable_char(*cp++); - addr += 4; - case 4: - *c++ = printable_char(*cp++); - *c++ = printable_char(*cp++); - addr += 2; - case 2: - *c++ = printable_char(*cp++); - addr++; - case 1: - *c++ = printable_char(*cp++); - addr++; - break; - } + addr += bytesperword; #undef printable_char } } From 01cb37351bafc1b44b962842926210115e231f0a Mon Sep 17 00:00:00 2001 From: "Gustavo A. R. Silva" <gustavo@embeddedor.com> Date: Sat, 4 Aug 2018 23:18:25 -0500 Subject: [PATCH 399/443] kdb: kdb_keyboard: mark expected switch fall-throughs In preparation to enabling -Wimplicit-fallthrough, mark switch cases where we are expecting to fall through. Notice that in this particular case, I replaced the code comments with a proper "fall through" annotation, which is what GCC is expecting to find. Signed-off-by: Gustavo A. R. Silva <gustavo@embeddedor.com> Reviewed-by: Daniel Thompson <daniel.thompson@linaro.org> Signed-off-by: Daniel Thompson <daniel.thompson@linaro.org> --- kernel/debug/kdb/kdb_keyboard.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/kernel/debug/kdb/kdb_keyboard.c b/kernel/debug/kdb/kdb_keyboard.c index 118527aa60eae..750497b0003a6 100644 --- a/kernel/debug/kdb/kdb_keyboard.c +++ b/kernel/debug/kdb/kdb_keyboard.c @@ -173,11 +173,11 @@ int kdb_get_kbd_char(void) case KT_LATIN: if (isprint(keychar)) break; /* printable characters */ - /* drop through */ + /* fall through */ case KT_SPEC: if (keychar == K_ENTER) break; - /* drop through */ + /* fall through */ default: return -1; /* ignore unprintables */ } From 646558ff1643467d3b941b47f519867cbca462c3 Mon Sep 17 00:00:00 2001 From: "Gustavo A. R. Silva" <gustavo@embeddedor.com> Date: Sat, 4 Aug 2018 21:48:44 -0500 Subject: [PATCH 400/443] kdb: kdb_support: mark expected switch fall-throughs In preparation to enabling -Wimplicit-fallthrough, mark switch cases where we are expecting to fall through. Notice that in this particular case, I replaced the code comments with a proper "fall through" annotation, which is what GCC is expecting to find. Signed-off-by: Gustavo A. R. Silva <gustavo@embeddedor.com> Reviewed-by: Daniel Thompson <daniel.thompson@linaro.org> Signed-off-by: Daniel Thompson <daniel.thompson@linaro.org> --- kernel/debug/kdb/kdb_support.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/kernel/debug/kdb/kdb_support.c b/kernel/debug/kdb/kdb_support.c index b14b0925c1848..50bf9b119bad0 100644 --- a/kernel/debug/kdb/kdb_support.c +++ b/kernel/debug/kdb/kdb_support.c @@ -432,7 +432,7 @@ int kdb_getphysword(unsigned long *word, unsigned long addr, size_t size) *word = w8; break; } - /* drop through */ + /* fall through */ default: diag = KDB_BADWIDTH; kdb_printf("kdb_getphysword: bad width %ld\n", (long) size); @@ -481,7 +481,7 @@ int kdb_getword(unsigned long *word, unsigned long addr, size_t size) *word = w8; break; } - /* drop through */ + /* fall through */ default: diag = KDB_BADWIDTH; kdb_printf("kdb_getword: bad width %ld\n", (long) size); @@ -525,7 +525,7 @@ int kdb_putword(unsigned long addr, unsigned long word, size_t size) diag = kdb_putarea(addr, w8); break; } - /* drop through */ + /* fall through */ default: diag = KDB_BADWIDTH; kdb_printf("kdb_putword: bad width %ld\n", (long) size); From fd35f192e42cf7c0df1e2480bfd5965e35b2f4ca Mon Sep 17 00:00:00 2001 From: Mimi Zohar <zohar@linux.ibm.com> Date: Fri, 9 Nov 2018 00:53:40 -0500 Subject: [PATCH 401/443] integrity: support new struct public_key_signature encoding field On systems with IMA-appraisal enabled with a policy requiring file signatures, the "good" signature values are stored on the filesystem as extended attributes (security.ima). Signature verification failure would normally be limited to just a particular file (eg. executable), but during boot signature verification failure could result in a system hang. Defining and requiring a new public_key_signature field requires all callers of asymmetric signature verification to be updated to reflect the change. This patch updates the integrity asymmetric_verify() caller. Fixes: 82f94f24475c ("KEYS: Provide software public key query function [ver #2]") Signed-off-by: Mimi Zohar <zohar@linux.ibm.com> Cc: David Howells <dhowells@redhat.com> Acked-by: Denis Kenzior <denkenz@gmail.com> Signed-off-by: James Morris <james.morris@microsoft.com> --- security/integrity/digsig_asymmetric.c | 1 + 1 file changed, 1 insertion(+) diff --git a/security/integrity/digsig_asymmetric.c b/security/integrity/digsig_asymmetric.c index 6dc0751445087..d775e03fbbcc7 100644 --- a/security/integrity/digsig_asymmetric.c +++ b/security/integrity/digsig_asymmetric.c @@ -106,6 +106,7 @@ int asymmetric_verify(struct key *keyring, const char *sig, pks.pkey_algo = "rsa"; pks.hash_algo = hash_algo_name[hdr->hash_algo]; + pks.encoding = "pkcs1"; pks.digest = (u8 *)data; pks.digest_size = datalen; pks.s = hdr->sig; From e39d8a186ed002854196668cb7562ffdfbc6d379 Mon Sep 17 00:00:00 2001 From: Trond Myklebust <trond.myklebust@hammerspace.com> Date: Tue, 13 Nov 2018 16:37:54 -0500 Subject: [PATCH 402/443] NFSv4: Fix an Oops during delegation callbacks If the server sends a CB_GETATTR or a CB_RECALL while the filesystem is being unmounted, then we can Oops when releasing the inode in nfs4_callback_getattr() and nfs4_callback_recall(). Signed-off-by: Trond Myklebust <trond.myklebust@hammerspace.com> --- fs/nfs/callback_proc.c | 4 ++-- fs/nfs/delegation.c | 11 +++++++++-- 2 files changed, 11 insertions(+), 4 deletions(-) diff --git a/fs/nfs/callback_proc.c b/fs/nfs/callback_proc.c index fa515d5ea5ba1..7b861bbc0b43f 100644 --- a/fs/nfs/callback_proc.c +++ b/fs/nfs/callback_proc.c @@ -66,7 +66,7 @@ __be32 nfs4_callback_getattr(void *argp, void *resp, out_iput: rcu_read_unlock(); trace_nfs4_cb_getattr(cps->clp, &args->fh, inode, -ntohl(res->status)); - iput(inode); + nfs_iput_and_deactive(inode); out: dprintk("%s: exit with status = %d\n", __func__, ntohl(res->status)); return res->status; @@ -108,7 +108,7 @@ __be32 nfs4_callback_recall(void *argp, void *resp, } trace_nfs4_cb_recall(cps->clp, &args->fh, inode, &args->stateid, -ntohl(res)); - iput(inode); + nfs_iput_and_deactive(inode); out: dprintk("%s: exit with status = %d\n", __func__, ntohl(res)); return res; diff --git a/fs/nfs/delegation.c b/fs/nfs/delegation.c index 07b8395605762..6ec2f78c1e191 100644 --- a/fs/nfs/delegation.c +++ b/fs/nfs/delegation.c @@ -850,16 +850,23 @@ nfs_delegation_find_inode_server(struct nfs_server *server, const struct nfs_fh *fhandle) { struct nfs_delegation *delegation; - struct inode *res = NULL; + struct inode *freeme, *res = NULL; list_for_each_entry_rcu(delegation, &server->delegations, super_list) { spin_lock(&delegation->lock); if (delegation->inode != NULL && nfs_compare_fh(fhandle, &NFS_I(delegation->inode)->fh) == 0) { - res = igrab(delegation->inode); + freeme = igrab(delegation->inode); + if (freeme && nfs_sb_active(freeme->i_sb)) + res = freeme; spin_unlock(&delegation->lock); if (res != NULL) return res; + if (freeme) { + rcu_read_unlock(); + iput(freeme); + rcu_read_lock(); + } return ERR_PTR(-EAGAIN); } spin_unlock(&delegation->lock); From 877181a8d9dc663f7a73f77f50af714d7888ec3b Mon Sep 17 00:00:00 2001 From: Paul Moore <paul@paul-moore.com> Date: Tue, 13 Nov 2018 21:44:33 -0500 Subject: [PATCH 403/443] selinux: fix non-MLS handling in mls_context_to_sid() Commit 95ffe194204a ("selinux: refactor mls_context_to_sid() and make it stricter") inadvertently changed how we handle labels that did not contain MLS information. This patch restores the proper behavior in mls_context_to_sid() and adds a comment explaining the proper behavior to help ensure this doesn't happen again. Fixes: 95ffe194204a ("selinux: refactor mls_context_to_sid() and make it stricter") Reported-by: Stephen Smalley <sds@tycho.nsa.gov> Signed-off-by: Paul Moore <paul@paul-moore.com> --- security/selinux/ss/mls.c | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/security/selinux/ss/mls.c b/security/selinux/ss/mls.c index 2fe459df3c858..b7efa2296969c 100644 --- a/security/selinux/ss/mls.c +++ b/security/selinux/ss/mls.c @@ -245,9 +245,13 @@ int mls_context_to_sid(struct policydb *pol, char *rangep[2]; if (!pol->mls_enabled) { - if ((def_sid != SECSID_NULL && oldc) || (*scontext) == '\0') - return 0; - return -EINVAL; + /* + * With no MLS, only return -EINVAL if there is a MLS field + * and it did not come from an xattr. + */ + if (oldc && def_sid == SECSID_NULL) + return -EINVAL; + return 0; } /* From 437ccdc8ce629470babdda1a7086e2f477048cbd Mon Sep 17 00:00:00 2001 From: Satheesh Rajendran <sathnaga@linux.vnet.ibm.com> Date: Thu, 8 Nov 2018 10:47:56 +0530 Subject: [PATCH 404/443] powerpc/numa: Suppress "VPHN is not supported" messages When VPHN function is not supported and during cpu hotplug event, kernel prints message 'VPHN function not supported. Disabling polling...'. Currently it prints on every hotplug event, it floods dmesg when a KVM guest tries to hotplug huge number of vcpus, let's just print once and suppress further kernel prints. Signed-off-by: Satheesh Rajendran <sathnaga@linux.vnet.ibm.com> Signed-off-by: Michael Ellerman <mpe@ellerman.id.au> --- arch/powerpc/mm/numa.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/powerpc/mm/numa.c b/arch/powerpc/mm/numa.c index 3a048e98a1323..ce28ae5ca0803 100644 --- a/arch/powerpc/mm/numa.c +++ b/arch/powerpc/mm/numa.c @@ -1178,7 +1178,7 @@ static long vphn_get_associativity(unsigned long cpu, switch (rc) { case H_FUNCTION: - printk(KERN_INFO + printk_once(KERN_INFO "VPHN is not supported. Disabling polling...\n"); stop_topology_update(); break; From 40dc948f234b73497c3278875eb08a01d5854d3f Mon Sep 17 00:00:00 2001 From: Max Filippov <jcmvbkbc@gmail.com> Date: Tue, 13 Nov 2018 23:46:42 -0800 Subject: [PATCH 405/443] xtensa: fix boot parameters address translation The bootloader may pass physical address of the boot parameters structure to the MMUv3 kernel in the register a2. Code in the _SetupMMU block in the arch/xtensa/kernel/head.S is supposed to map that physical address to the virtual address in the configured virtual memory layout. This code haven't been updated when additional 256+256 and 512+512 memory layouts were introduced and it may produce wrong addresses when used with these layouts. Cc: stable@vger.kernel.org Signed-off-by: Max Filippov <jcmvbkbc@gmail.com> --- arch/xtensa/kernel/head.S | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/arch/xtensa/kernel/head.S b/arch/xtensa/kernel/head.S index 2f76118ecf623..9053a5622d2c3 100644 --- a/arch/xtensa/kernel/head.S +++ b/arch/xtensa/kernel/head.S @@ -88,9 +88,12 @@ _SetupMMU: initialize_mmu #if defined(CONFIG_MMU) && XCHAL_HAVE_PTP_MMU && XCHAL_HAVE_SPANNING_WAY rsr a2, excsave1 - movi a3, 0x08000000 + movi a3, XCHAL_KSEG_PADDR + bltu a2, a3, 1f + sub a2, a2, a3 + movi a3, XCHAL_KSEG_SIZE bgeu a2, a3, 1f - movi a3, 0xd0000000 + movi a3, XCHAL_KSEG_CACHED_VADDR add a2, a2, a3 wsr a2, excsave1 1: From 6a67a20366f894c172734f28c5646bdbe48a46e3 Mon Sep 17 00:00:00 2001 From: Lionel Landwerlin <lionel.g.landwerlin@intel.com> Date: Mon, 12 Nov 2018 12:39:31 +0000 Subject: [PATCH 406/443] drm/i915: fix broadwell EU computation subslice_mask is an array indexed by slice, not subslice. Signed-off-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com> Fixes: 8cc7669355136f ("drm/i915: store all subslice masks") Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=108712 Reviewed-by: Chris Wilson <chris@chris-wilson.co.uk> Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com> Link: https://patchwork.freedesktop.org/patch/msgid/20181112123931.2815-1-lionel.g.landwerlin@intel.com (cherry picked from commit 63ac3328f0d1d37f286e397b14d9596ed09d7ca5) Signed-off-by: Joonas Lahtinen <joonas.lahtinen@linux.intel.com> --- drivers/gpu/drm/i915/intel_device_info.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/intel_device_info.c b/drivers/gpu/drm/i915/intel_device_info.c index 0ef0c6448d53a..01fa98299bae6 100644 --- a/drivers/gpu/drm/i915/intel_device_info.c +++ b/drivers/gpu/drm/i915/intel_device_info.c @@ -474,7 +474,7 @@ static void broadwell_sseu_info_init(struct drm_i915_private *dev_priv) u8 eu_disabled_mask; u32 n_disabled; - if (!(sseu->subslice_mask[ss] & BIT(ss))) + if (!(sseu->subslice_mask[s] & BIT(ss))) /* skip disabled subslice */ continue; From a22612301ae61d78a7c0c82dc556931a35db0e91 Mon Sep 17 00:00:00 2001 From: Mika Kuoppala <mika.kuoppala@linux.intel.com> Date: Fri, 9 Nov 2018 16:09:23 +0200 Subject: [PATCH 407/443] drm/i915/icl: Drop spurious register read from icl_dbuf_slices_update Register DBUF_CTL_S2 is read and it's value is not used. As there is no explanation why we should prime the hardware with read, remove it as spurious. Fixes: aa9664ffe863 ("drm/i915/icl: Enable 2nd DBuf slice only when needed") Cc: Mahesh Kumar <mahesh1.kumar@intel.com> Cc: Rodrigo Vivi <rodrigo.vivi@intel.com> Signed-off-by: Mika Kuoppala <mika.kuoppala@linux.intel.com> Reviewed-by: Imre Deak <imre.deak@intel.com> Link: https://patchwork.freedesktop.org/patch/msgid/20181109140924.2663-1-mika.kuoppala@linux.intel.com (cherry picked from commit 8577c319b6511fbc391f3775225fecd8b979bc26) Signed-off-by: Joonas Lahtinen <joonas.lahtinen@linux.intel.com> --- drivers/gpu/drm/i915/intel_runtime_pm.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_runtime_pm.c b/drivers/gpu/drm/i915/intel_runtime_pm.c index 0a4990d8843c4..44e4491a49189 100644 --- a/drivers/gpu/drm/i915/intel_runtime_pm.c +++ b/drivers/gpu/drm/i915/intel_runtime_pm.c @@ -3176,8 +3176,7 @@ static u8 intel_dbuf_max_slices(struct drm_i915_private *dev_priv) void icl_dbuf_slices_update(struct drm_i915_private *dev_priv, u8 req_slices) { - u8 hw_enabled_slices = dev_priv->wm.skl_hw.ddb.enabled_slices; - u32 val; + const u8 hw_enabled_slices = dev_priv->wm.skl_hw.ddb.enabled_slices; bool ret; if (req_slices > intel_dbuf_max_slices(dev_priv)) { @@ -3188,7 +3187,6 @@ void icl_dbuf_slices_update(struct drm_i915_private *dev_priv, if (req_slices == hw_enabled_slices || req_slices == 0) return; - val = I915_READ(DBUF_CTL_S2); if (req_slices > hw_enabled_slices) ret = intel_dbuf_slice_set(dev_priv, DBUF_CTL_S2, true); else From 4800bf7bc8c725e955fcbc6191cc872f43f506d3 Mon Sep 17 00:00:00 2001 From: Dave Chinner <dchinner@redhat.com> Date: Wed, 14 Nov 2018 08:17:18 -0700 Subject: [PATCH 408/443] block: fix 32 bit overflow in __blkdev_issue_discard() A discard cleanup merged into 4.20-rc2 causes fstests xfs/259 to fall into an endless loop in the discard code. The test is creating a device that is exactly 2^32 sectors in size to test mkfs boundary conditions around the 32 bit sector overflow region. mkfs issues a discard for the entire device size by default, and hence this throws a sector count of 2^32 into blkdev_issue_discard(). It takes the number of sectors to discard as a sector_t - a 64 bit value. The commit ba5d73851e71 ("block: cleanup __blkdev_issue_discard") takes this sector count and casts it to a 32 bit value before comapring it against the maximum allowed discard size the device has. This truncates away the upper 32 bits, and so if the lower 32 bits of the sector count is zero, it starts issuing discards of length 0. This causes the code to fall into an endless loop, issuing a zero length discards over and over again on the same sector. Fixes: ba5d73851e71 ("block: cleanup __blkdev_issue_discard") Tested-by: Darrick J. Wong <darrick.wong@oracle.com> Reviewed-by: Darrick J. Wong <darrick.wong@oracle.com> Signed-off-by: Dave Chinner <dchinner@redhat.com> Killed pointless WARN_ON(). Signed-off-by: Jens Axboe <axboe@kernel.dk> --- block/blk-lib.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/block/blk-lib.c b/block/blk-lib.c index e8b3bb9bf3759..5f2c429d43784 100644 --- a/block/blk-lib.c +++ b/block/blk-lib.c @@ -55,9 +55,11 @@ int __blkdev_issue_discard(struct block_device *bdev, sector_t sector, return -EINVAL; while (nr_sects) { - unsigned int req_sects = min_t(unsigned int, nr_sects, + sector_t req_sects = min_t(sector_t, nr_sects, bio_allowed_max_sectors(q)); + WARN_ON_ONCE((req_sects << 9) > UINT_MAX); + bio = blk_next_bio(bio, 0, gfp_mask); bio->bi_iter.bi_sector = sector; bio_set_dev(bio, bdev); From 8dc765d438f1e42b3e8227b3b09fad7d73f4ec9a Mon Sep 17 00:00:00 2001 From: Ming Lei <ming.lei@redhat.com> Date: Wed, 14 Nov 2018 16:25:51 +0800 Subject: [PATCH 409/443] SCSI: fix queue cleanup race before queue initialization is done c2856ae2f315d ("blk-mq: quiesce queue before freeing queue") has already fixed this race, however the implied synchronize_rcu() in blk_mq_quiesce_queue() can slow down LUN probe a lot, so caused performance regression. Then 1311326cf4755c7 ("blk-mq: avoid to synchronize rcu inside blk_cleanup_queue()") tried to quiesce queue for avoiding unnecessary synchronize_rcu() only when queue initialization is done, because it is usual to see lots of inexistent LUNs which need to be probed. However, turns out it isn't safe to quiesce queue only when queue initialization is done. Because when one SCSI command is completed, the user of sending command can be waken up immediately, then the scsi device may be removed, meantime the run queue in scsi_end_request() is still in-progress, so kernel panic can be caused. In Red Hat QE lab, there are several reports about this kind of kernel panic triggered during kernel booting. This patch tries to address the issue by grabing one queue usage counter during freeing one request and the following run queue. Fixes: 1311326cf4755c7 ("blk-mq: avoid to synchronize rcu inside blk_cleanup_queue()") Cc: Andrew Jones <drjones@redhat.com> Cc: Bart Van Assche <bart.vanassche@wdc.com> Cc: linux-scsi@vger.kernel.org Cc: Martin K. Petersen <martin.petersen@oracle.com> Cc: Christoph Hellwig <hch@lst.de> Cc: James E.J. Bottomley <jejb@linux.vnet.ibm.com> Cc: stable <stable@vger.kernel.org> Cc: jianchao.wang <jianchao.w.wang@oracle.com> Signed-off-by: Ming Lei <ming.lei@redhat.com> Signed-off-by: Jens Axboe <axboe@kernel.dk> --- block/blk-core.c | 5 ++--- drivers/scsi/scsi_lib.c | 8 ++++++++ 2 files changed, 10 insertions(+), 3 deletions(-) diff --git a/block/blk-core.c b/block/blk-core.c index ce12515f9b9b9..deb56932f8c46 100644 --- a/block/blk-core.c +++ b/block/blk-core.c @@ -798,9 +798,8 @@ void blk_cleanup_queue(struct request_queue *q) * dispatch may still be in-progress since we dispatch requests * from more than one contexts. * - * No need to quiesce queue if it isn't initialized yet since - * blk_freeze_queue() should be enough for cases of passthrough - * request. + * We rely on driver to deal with the race in case that queue + * initialization isn't done. */ if (q->mq_ops && blk_queue_init_done(q)) blk_mq_quiesce_queue(q); diff --git a/drivers/scsi/scsi_lib.c b/drivers/scsi/scsi_lib.c index c7fccbb8f5545..fa6e0c3b3aa67 100644 --- a/drivers/scsi/scsi_lib.c +++ b/drivers/scsi/scsi_lib.c @@ -697,6 +697,12 @@ static bool scsi_end_request(struct request *req, blk_status_t error, */ scsi_mq_uninit_cmd(cmd); + /* + * queue is still alive, so grab the ref for preventing it + * from being cleaned up during running queue. + */ + percpu_ref_get(&q->q_usage_counter); + __blk_mq_end_request(req, error); if (scsi_target(sdev)->single_lun || @@ -704,6 +710,8 @@ static bool scsi_end_request(struct request *req, blk_status_t error, kblockd_schedule_work(&sdev->requeue_work); else blk_mq_run_hw_queues(q, true); + + percpu_ref_put(&q->q_usage_counter); } else { unsigned long flags; From 66f93c5a02d5ba6ef17fef459143961382593212 Mon Sep 17 00:00:00 2001 From: Nicholas Piggin <npiggin@gmail.com> Date: Thu, 15 Nov 2018 12:34:27 +1000 Subject: [PATCH 410/443] powerpc/64: Fix kernel stack 16-byte alignment MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Commit 4c2de74cc869 ("powerpc/64: Interrupts save PPR on stack rather than thread_struct") changed sizeof(struct pt_regs) % 16 from 0 to 8, which causes the interrupt frame allocation on kernel entry to put the kernel stack out of alignment. Quadword (16-byte) alignment for the stack is required by both the 64-bit v1 ABI (v1.9 § 3.2.2) and the 64-bit v2 ABI (v1.1 § 2.2.2.1). Add a pad field to fix alignment, and add a BUILD_BUG_ON to catch this in future. Fixes: 4c2de74cc869 ("powerpc/64: Interrupts save PPR on stack rather than thread_struct") Signed-off-by: Nicholas Piggin <npiggin@gmail.com> Signed-off-by: Michael Ellerman <mpe@ellerman.id.au> --- arch/powerpc/include/asm/ptrace.h | 1 + arch/powerpc/kernel/setup_64.c | 2 ++ 2 files changed, 3 insertions(+) diff --git a/arch/powerpc/include/asm/ptrace.h b/arch/powerpc/include/asm/ptrace.h index f73886a1a7f51..0b8a735b6d85f 100644 --- a/arch/powerpc/include/asm/ptrace.h +++ b/arch/powerpc/include/asm/ptrace.h @@ -54,6 +54,7 @@ struct pt_regs #ifdef CONFIG_PPC64 unsigned long ppr; + unsigned long __pad; /* Maintain 16 byte interrupt stack alignment */ #endif }; #endif diff --git a/arch/powerpc/kernel/setup_64.c b/arch/powerpc/kernel/setup_64.c index 2a51e4cc8246d..236c1151a3a77 100644 --- a/arch/powerpc/kernel/setup_64.c +++ b/arch/powerpc/kernel/setup_64.c @@ -636,6 +636,8 @@ static void *__init alloc_stack(unsigned long limit, int cpu) { unsigned long pa; + BUILD_BUG_ON(STACK_INT_FRAME_SIZE % 16); + pa = memblock_alloc_base_nid(THREAD_SIZE, THREAD_SIZE, limit, early_cpu_to_node(cpu), MEMBLOCK_NONE); if (!pa) { From ef1491e791308317bb9851a0ad380c4a68b58d54 Mon Sep 17 00:00:00 2001 From: Waiman Long <longman@redhat.com> Date: Wed, 14 Nov 2018 09:55:40 -0800 Subject: [PATCH 411/443] efi: Fix debugobjects warning on 'efi_rts_work' The following commit: 9dbbedaa6171 ("efi: Make efi_rts_work accessible to efi page fault handler") converted 'efi_rts_work' from an auto variable to a global variable. However, when submitting the work, INIT_WORK_ONSTACK() was still used, causing the following complaint from debugobjects: ODEBUG: object 00000000ed27b500 is NOT on stack 00000000c7d38760, but annotated. Change the macro to just INIT_WORK() to eliminate the warning. Signed-off-by: Waiman Long <longman@redhat.com> Signed-off-by: Ard Biesheuvel <ard.biesheuvel@linaro.org> Acked-by: Sai Praneeth Prakhya <sai.praneeth.prakhya@intel.com> Cc: Linus Torvalds <torvalds@linux-foundation.org> Cc: Peter Zijlstra <peterz@infradead.org> Cc: Thomas Gleixner <tglx@linutronix.de> Cc: linux-efi@vger.kernel.org Fixes: 9dbbedaa6171 ("efi: Make efi_rts_work accessible to efi page fault handler") Link: http://lkml.kernel.org/r/20181114175544.12860-2-ard.biesheuvel@linaro.org Signed-off-by: Ingo Molnar <mingo@kernel.org> --- drivers/firmware/efi/runtime-wrappers.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/firmware/efi/runtime-wrappers.c b/drivers/firmware/efi/runtime-wrappers.c index a19d845bdb067..8903b9ccfc2b8 100644 --- a/drivers/firmware/efi/runtime-wrappers.c +++ b/drivers/firmware/efi/runtime-wrappers.c @@ -67,7 +67,7 @@ struct efi_runtime_work efi_rts_work; } \ \ init_completion(&efi_rts_work.efi_rts_comp); \ - INIT_WORK_ONSTACK(&efi_rts_work.work, efi_call_rts); \ + INIT_WORK(&efi_rts_work.work, efi_call_rts); \ efi_rts_work.arg1 = _arg1; \ efi_rts_work.arg2 = _arg2; \ efi_rts_work.arg3 = _arg3; \ From 33412b8673135b18ea42beb7f5117ed0091798b6 Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel <ard.biesheuvel@linaro.org> Date: Wed, 14 Nov 2018 09:55:41 -0800 Subject: [PATCH 412/443] efi/arm: Revert deferred unmap of early memmap mapping Commit: 3ea86495aef2 ("efi/arm: preserve early mapping of UEFI memory map longer for BGRT") deferred the unmap of the early mapping of the UEFI memory map to accommodate the ACPI BGRT code, which looks up the memory type that backs the BGRT table to validate it against the requirements of the UEFI spec. Unfortunately, this causes problems on ARM, which does not permit early mappings to persist after paging_init() is called, resulting in a WARN() splat. Since we don't support the BGRT table on ARM anway, let's revert ARM to the old behaviour, which is to take down the early mapping at the end of efi_init(). Signed-off-by: Ard Biesheuvel <ard.biesheuvel@linaro.org> Cc: Linus Torvalds <torvalds@linux-foundation.org> Cc: Peter Zijlstra <peterz@infradead.org> Cc: Thomas Gleixner <tglx@linutronix.de> Cc: linux-efi@vger.kernel.org Fixes: 3ea86495aef2 ("efi/arm: preserve early mapping of UEFI memory ...") Link: http://lkml.kernel.org/r/20181114175544.12860-3-ard.biesheuvel@linaro.org Signed-off-by: Ingo Molnar <mingo@kernel.org> --- drivers/firmware/efi/arm-init.c | 4 ++++ drivers/firmware/efi/arm-runtime.c | 2 +- drivers/firmware/efi/memmap.c | 3 +++ 3 files changed, 8 insertions(+), 1 deletion(-) diff --git a/drivers/firmware/efi/arm-init.c b/drivers/firmware/efi/arm-init.c index 388a929baf95d..1a6a77df8a5e8 100644 --- a/drivers/firmware/efi/arm-init.c +++ b/drivers/firmware/efi/arm-init.c @@ -265,6 +265,10 @@ void __init efi_init(void) (params.mmap & ~PAGE_MASK))); init_screen_info(); + + /* ARM does not permit early mappings to persist across paging_init() */ + if (IS_ENABLED(CONFIG_ARM)) + efi_memmap_unmap(); } static int __init register_gop_device(void) diff --git a/drivers/firmware/efi/arm-runtime.c b/drivers/firmware/efi/arm-runtime.c index 922cfb813109a..a00934d263c51 100644 --- a/drivers/firmware/efi/arm-runtime.c +++ b/drivers/firmware/efi/arm-runtime.c @@ -110,7 +110,7 @@ static int __init arm_enable_runtime_services(void) { u64 mapsize; - if (!efi_enabled(EFI_BOOT) || !efi_enabled(EFI_MEMMAP)) { + if (!efi_enabled(EFI_BOOT)) { pr_info("EFI services will not be available.\n"); return 0; } diff --git a/drivers/firmware/efi/memmap.c b/drivers/firmware/efi/memmap.c index fa2904fb841fe..38b686c67b177 100644 --- a/drivers/firmware/efi/memmap.c +++ b/drivers/firmware/efi/memmap.c @@ -118,6 +118,9 @@ int __init efi_memmap_init_early(struct efi_memory_map_data *data) void __init efi_memmap_unmap(void) { + if (!efi_enabled(EFI_MEMMAP)) + return; + if (!efi.memmap.late) { unsigned long size; From 72a58a63a164b4e9d2d914e65caeb551846883f1 Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel <ard.biesheuvel@linaro.org> Date: Wed, 14 Nov 2018 09:55:42 -0800 Subject: [PATCH 413/443] efi/arm/libstub: Pack FDT after populating it Commit: 24d7c494ce46 ("efi/arm-stub: Round up FDT allocation to mapping size") increased the allocation size for the FDT image created by the stub to a fixed value of 2 MB, to simplify the former code that made several attempts with increasing values for the size. This is reasonable given that the allocation is of type EFI_LOADER_DATA, which is released to the kernel unless it is explicitly memblock_reserve()d by the early boot code. However, this allocation size leaked into the 'size' field of the FDT header metadata, and so the entire allocation remains occupied by the device tree binary, even if most of it is not used to store device tree information. So call fdt_pack() to shrink the FDT data structure to its minimum size after populating all the fields, so that the remaining memory is no longer wasted. Signed-off-by: Ard Biesheuvel <ard.biesheuvel@linaro.org> Cc: <stable@vger.kernel.org> # v4.12+ Cc: Linus Torvalds <torvalds@linux-foundation.org> Cc: Peter Zijlstra <peterz@infradead.org> Cc: Thomas Gleixner <tglx@linutronix.de> Cc: linux-efi@vger.kernel.org Fixes: 24d7c494ce46 ("efi/arm-stub: Round up FDT allocation to mapping size") Link: http://lkml.kernel.org/r/20181114175544.12860-4-ard.biesheuvel@linaro.org Signed-off-by: Ingo Molnar <mingo@kernel.org> --- drivers/firmware/efi/libstub/fdt.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/firmware/efi/libstub/fdt.c b/drivers/firmware/efi/libstub/fdt.c index 8830fa601e45d..0c0d2312f4a8a 100644 --- a/drivers/firmware/efi/libstub/fdt.c +++ b/drivers/firmware/efi/libstub/fdt.c @@ -158,6 +158,10 @@ static efi_status_t update_fdt(efi_system_table_t *sys_table, void *orig_fdt, return efi_status; } } + + /* shrink the FDT back to its minimum size */ + fdt_pack(fdt); + return EFI_SUCCESS; fdt_set_fail: From eff896288872d687d9662000ec9ae11b6d61766f Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel <ard.biesheuvel@linaro.org> Date: Wed, 14 Nov 2018 09:55:43 -0800 Subject: [PATCH 414/443] efi/arm: Defer persistent reservations until after paging_init() The new memory EFI reservation feature we introduced to allow memory reservations to persist across kexec may trigger an unbounded number of calls to memblock_reserve(). The memblock subsystem can deal with this fine, but not before memblock resizing is enabled, which we can only do after paging_init(), when the memory we reallocate the array into is actually mapped. So break out the memreserve table processing into a separate routine and call it after paging_init() on arm64. On ARM, because of limited reviewing bandwidth of the maintainer, we cannot currently fix this, so instead, disable the EFI persistent memreserve entirely on ARM so we can fix it later. Tested-by: Marc Zyngier <marc.zyngier@arm.com> Signed-off-by: Ard Biesheuvel <ard.biesheuvel@linaro.org> Cc: Linus Torvalds <torvalds@linux-foundation.org> Cc: Peter Zijlstra <peterz@infradead.org> Cc: Thomas Gleixner <tglx@linutronix.de> Cc: linux-efi@vger.kernel.org Link: http://lkml.kernel.org/r/20181114175544.12860-5-ard.biesheuvel@linaro.org Signed-off-by: Ingo Molnar <mingo@kernel.org> --- arch/arm64/kernel/setup.c | 1 + drivers/firmware/efi/efi.c | 4 ++++ drivers/firmware/efi/libstub/arm-stub.c | 3 +++ include/linux/efi.h | 7 +++++++ 4 files changed, 15 insertions(+) diff --git a/arch/arm64/kernel/setup.c b/arch/arm64/kernel/setup.c index 953e316521fca..f4fc1e0544b73 100644 --- a/arch/arm64/kernel/setup.c +++ b/arch/arm64/kernel/setup.c @@ -313,6 +313,7 @@ void __init setup_arch(char **cmdline_p) arm64_memblock_init(); paging_init(); + efi_apply_persistent_mem_reservations(); acpi_table_upgrade(); diff --git a/drivers/firmware/efi/efi.c b/drivers/firmware/efi/efi.c index 249eb70691b0f..72a4da76d2740 100644 --- a/drivers/firmware/efi/efi.c +++ b/drivers/firmware/efi/efi.c @@ -592,7 +592,11 @@ int __init efi_config_parse_tables(void *config_tables, int count, int sz, early_memunmap(tbl, sizeof(*tbl)); } + return 0; +} +int __init efi_apply_persistent_mem_reservations(void) +{ if (efi.mem_reserve != EFI_INVALID_TABLE_ADDR) { unsigned long prsv = efi.mem_reserve; diff --git a/drivers/firmware/efi/libstub/arm-stub.c b/drivers/firmware/efi/libstub/arm-stub.c index 30ac0c975f8a1..3d36142cf8120 100644 --- a/drivers/firmware/efi/libstub/arm-stub.c +++ b/drivers/firmware/efi/libstub/arm-stub.c @@ -75,6 +75,9 @@ void install_memreserve_table(efi_system_table_t *sys_table_arg) efi_guid_t memreserve_table_guid = LINUX_EFI_MEMRESERVE_TABLE_GUID; efi_status_t status; + if (IS_ENABLED(CONFIG_ARM)) + return; + status = efi_call_early(allocate_pool, EFI_LOADER_DATA, sizeof(*rsv), (void **)&rsv); if (status != EFI_SUCCESS) { diff --git a/include/linux/efi.h b/include/linux/efi.h index 845174e113ce9..100ce4a4aff6c 100644 --- a/include/linux/efi.h +++ b/include/linux/efi.h @@ -1167,6 +1167,8 @@ static inline bool efi_enabled(int feature) extern void efi_reboot(enum reboot_mode reboot_mode, const char *__unused); extern bool efi_is_table_address(unsigned long phys_addr); + +extern int efi_apply_persistent_mem_reservations(void); #else static inline bool efi_enabled(int feature) { @@ -1185,6 +1187,11 @@ static inline bool efi_is_table_address(unsigned long phys_addr) { return false; } + +static inline int efi_apply_persistent_mem_reservations(void) +{ + return 0; +} #endif extern int efi_status_to_err(efi_status_t status); From 63eb322d89c8505af9b4a3d703e85e42281ebaa0 Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel <ard.biesheuvel@linaro.org> Date: Wed, 14 Nov 2018 09:55:44 -0800 Subject: [PATCH 415/443] efi: Permit calling efi_mem_reserve_persistent() from atomic context Currently, efi_mem_reserve_persistent() may not be called from atomic context, since both the kmalloc() call and the memremap() call may sleep. The kmalloc() call is easy enough to fix, but the memremap() call needs to be moved into an init hook since we cannot control the memory allocation behavior of memremap() at the call site. Signed-off-by: Ard Biesheuvel <ard.biesheuvel@linaro.org> Cc: Linus Torvalds <torvalds@linux-foundation.org> Cc: Peter Zijlstra <peterz@infradead.org> Cc: Thomas Gleixner <tglx@linutronix.de> Cc: linux-efi@vger.kernel.org Link: http://lkml.kernel.org/r/20181114175544.12860-6-ard.biesheuvel@linaro.org Signed-off-by: Ingo Molnar <mingo@kernel.org> --- drivers/firmware/efi/efi.c | 31 +++++++++++++++++++------------ 1 file changed, 19 insertions(+), 12 deletions(-) diff --git a/drivers/firmware/efi/efi.c b/drivers/firmware/efi/efi.c index 72a4da76d2740..fad7c62cfc0e4 100644 --- a/drivers/firmware/efi/efi.c +++ b/drivers/firmware/efi/efi.c @@ -967,36 +967,43 @@ bool efi_is_table_address(unsigned long phys_addr) } static DEFINE_SPINLOCK(efi_mem_reserve_persistent_lock); +static struct linux_efi_memreserve *efi_memreserve_root __ro_after_init; int efi_mem_reserve_persistent(phys_addr_t addr, u64 size) { - struct linux_efi_memreserve *rsv, *parent; + struct linux_efi_memreserve *rsv; - if (efi.mem_reserve == EFI_INVALID_TABLE_ADDR) + if (!efi_memreserve_root) return -ENODEV; - rsv = kmalloc(sizeof(*rsv), GFP_KERNEL); + rsv = kmalloc(sizeof(*rsv), GFP_ATOMIC); if (!rsv) return -ENOMEM; - parent = memremap(efi.mem_reserve, sizeof(*rsv), MEMREMAP_WB); - if (!parent) { - kfree(rsv); - return -ENOMEM; - } - rsv->base = addr; rsv->size = size; spin_lock(&efi_mem_reserve_persistent_lock); - rsv->next = parent->next; - parent->next = __pa(rsv); + rsv->next = efi_memreserve_root->next; + efi_memreserve_root->next = __pa(rsv); spin_unlock(&efi_mem_reserve_persistent_lock); - memunmap(parent); + return 0; +} +static int __init efi_memreserve_root_init(void) +{ + if (efi.mem_reserve == EFI_INVALID_TABLE_ADDR) + return -ENODEV; + + efi_memreserve_root = memremap(efi.mem_reserve, + sizeof(*efi_memreserve_root), + MEMREMAP_WB); + if (!efi_memreserve_root) + return -ENOMEM; return 0; } +early_initcall(efi_memreserve_root_init); #ifdef CONFIG_KEXEC static int update_efi_random_seed(struct notifier_block *nb, From 2a2777990a342b05f611e78822fc4fa2d9789ade Mon Sep 17 00:00:00 2001 From: Maarten Lankhorst <maarten.lankhorst@linux.intel.com> Date: Wed, 14 Nov 2018 13:49:23 +0200 Subject: [PATCH 416/443] drm/i915: Move programming plane scaler to its own function. This cleans the code up slightly, and will make other changes easier. Signed-off-by: Maarten Lankhorst <maarten.lankhorst@linux.intel.com> Reviewed-by: Matt Roper <matthew.d.roper@intel.com> Link: https://patchwork.freedesktop.org/patch/msgid/20180920102711.4184-8-maarten.lankhorst@linux.intel.com (cherry picked from commit ab5c60bf76755d24ae8de5c1c6ac594934656ace) Signed-off-by: Joonas Lahtinen <joonas.lahtinen@linux.intel.com> --- drivers/gpu/drm/i915/intel_sprite.c | 90 +++++++++++++++++------------ 1 file changed, 52 insertions(+), 38 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_sprite.c b/drivers/gpu/drm/i915/intel_sprite.c index 5fd2f7bf39271..873adcc20109e 100644 --- a/drivers/gpu/drm/i915/intel_sprite.c +++ b/drivers/gpu/drm/i915/intel_sprite.c @@ -302,13 +302,63 @@ skl_plane_max_stride(struct intel_plane *plane, return min(8192 * cpp, 32768); } +static void +skl_program_scaler(struct drm_i915_private *dev_priv, + struct intel_plane *plane, + const struct intel_crtc_state *crtc_state, + const struct intel_plane_state *plane_state) +{ + enum plane_id plane_id = plane->id; + enum pipe pipe = plane->pipe; + int scaler_id = plane_state->scaler_id; + const struct intel_scaler *scaler = + &crtc_state->scaler_state.scalers[scaler_id]; + int crtc_x = plane_state->base.dst.x1; + int crtc_y = plane_state->base.dst.y1; + uint32_t crtc_w = drm_rect_width(&plane_state->base.dst); + uint32_t crtc_h = drm_rect_height(&plane_state->base.dst); + u16 y_hphase, uv_rgb_hphase; + u16 y_vphase, uv_rgb_vphase; + + /* Sizes are 0 based */ + crtc_w--; + crtc_h--; + + /* TODO: handle sub-pixel coordinates */ + if (plane_state->base.fb->format->format == DRM_FORMAT_NV12) { + y_hphase = skl_scaler_calc_phase(1, false); + y_vphase = skl_scaler_calc_phase(1, false); + + /* MPEG2 chroma siting convention */ + uv_rgb_hphase = skl_scaler_calc_phase(2, true); + uv_rgb_vphase = skl_scaler_calc_phase(2, false); + } else { + /* not used */ + y_hphase = 0; + y_vphase = 0; + + uv_rgb_hphase = skl_scaler_calc_phase(1, false); + uv_rgb_vphase = skl_scaler_calc_phase(1, false); + } + + I915_WRITE_FW(SKL_PS_CTRL(pipe, scaler_id), + PS_SCALER_EN | PS_PLANE_SEL(plane_id) | scaler->mode); + I915_WRITE_FW(SKL_PS_PWR_GATE(pipe, scaler_id), 0); + I915_WRITE_FW(SKL_PS_VPHASE(pipe, scaler_id), + PS_Y_PHASE(y_vphase) | PS_UV_RGB_PHASE(uv_rgb_vphase)); + I915_WRITE_FW(SKL_PS_HPHASE(pipe, scaler_id), + PS_Y_PHASE(y_hphase) | PS_UV_RGB_PHASE(uv_rgb_hphase)); + I915_WRITE_FW(SKL_PS_WIN_POS(pipe, scaler_id), (crtc_x << 16) | crtc_y); + I915_WRITE_FW(SKL_PS_WIN_SZ(pipe, scaler_id), + ((crtc_w + 1) << 16)|(crtc_h + 1)); +} + void skl_update_plane(struct intel_plane *plane, const struct intel_crtc_state *crtc_state, const struct intel_plane_state *plane_state) { struct drm_i915_private *dev_priv = to_i915(plane->base.dev); - const struct drm_framebuffer *fb = plane_state->base.fb; enum plane_id plane_id = plane->id; enum pipe pipe = plane->pipe; u32 plane_ctl = plane_state->ctl; @@ -318,8 +368,6 @@ skl_update_plane(struct intel_plane *plane, u32 aux_stride = skl_plane_stride(plane_state, 1); int crtc_x = plane_state->base.dst.x1; int crtc_y = plane_state->base.dst.y1; - uint32_t crtc_w = drm_rect_width(&plane_state->base.dst); - uint32_t crtc_h = drm_rect_height(&plane_state->base.dst); uint32_t x = plane_state->color_plane[0].x; uint32_t y = plane_state->color_plane[0].y; uint32_t src_w = drm_rect_width(&plane_state->base.src) >> 16; @@ -329,8 +377,6 @@ skl_update_plane(struct intel_plane *plane, /* Sizes are 0 based */ src_w--; src_h--; - crtc_w--; - crtc_h--; spin_lock_irqsave(&dev_priv->uncore.lock, irqflags); @@ -355,39 +401,7 @@ skl_update_plane(struct intel_plane *plane, /* program plane scaler */ if (plane_state->scaler_id >= 0) { - int scaler_id = plane_state->scaler_id; - const struct intel_scaler *scaler = - &crtc_state->scaler_state.scalers[scaler_id]; - u16 y_hphase, uv_rgb_hphase; - u16 y_vphase, uv_rgb_vphase; - - /* TODO: handle sub-pixel coordinates */ - if (fb->format->format == DRM_FORMAT_NV12) { - y_hphase = skl_scaler_calc_phase(1, false); - y_vphase = skl_scaler_calc_phase(1, false); - - /* MPEG2 chroma siting convention */ - uv_rgb_hphase = skl_scaler_calc_phase(2, true); - uv_rgb_vphase = skl_scaler_calc_phase(2, false); - } else { - /* not used */ - y_hphase = 0; - y_vphase = 0; - - uv_rgb_hphase = skl_scaler_calc_phase(1, false); - uv_rgb_vphase = skl_scaler_calc_phase(1, false); - } - - I915_WRITE_FW(SKL_PS_CTRL(pipe, scaler_id), - PS_SCALER_EN | PS_PLANE_SEL(plane_id) | scaler->mode); - I915_WRITE_FW(SKL_PS_PWR_GATE(pipe, scaler_id), 0); - I915_WRITE_FW(SKL_PS_VPHASE(pipe, scaler_id), - PS_Y_PHASE(y_vphase) | PS_UV_RGB_PHASE(uv_rgb_vphase)); - I915_WRITE_FW(SKL_PS_HPHASE(pipe, scaler_id), - PS_Y_PHASE(y_hphase) | PS_UV_RGB_PHASE(uv_rgb_hphase)); - I915_WRITE_FW(SKL_PS_WIN_POS(pipe, scaler_id), (crtc_x << 16) | crtc_y); - I915_WRITE_FW(SKL_PS_WIN_SZ(pipe, scaler_id), - ((crtc_w + 1) << 16)|(crtc_h + 1)); + skl_program_scaler(dev_priv, plane, crtc_state, plane_state); I915_WRITE_FW(PLANE_POS(pipe, plane_id), 0); } else { From 27971d613fcb5b6ad96320bc4f2c90f4d4fde768 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= <ville.syrjala@linux.intel.com> Date: Wed, 14 Nov 2018 13:49:24 +0200 Subject: [PATCH 417/443] drm/i915: Clean up skl_program_scaler() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Remove the "sizes are 0 based" stuff that is not even true for the scaler. v2: Rebase Signed-off-by: Ville Syrjälä <ville.syrjala@linux.intel.com> Link: https://patchwork.freedesktop.org/patch/msgid/20181101151736.20522-1-ville.syrjala@linux.intel.com Reviewed-by: Rodrigo Vivi <rodrigo.vivi@intel.com> (cherry picked from commit d0105af939769393d6447a04cee2d1ae12e3f09a) Signed-off-by: Joonas Lahtinen <joonas.lahtinen@linux.intel.com> --- drivers/gpu/drm/i915/intel_sprite.c | 17 +++++------------ 1 file changed, 5 insertions(+), 12 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_sprite.c b/drivers/gpu/drm/i915/intel_sprite.c index 873adcc20109e..fa7eaace5f922 100644 --- a/drivers/gpu/drm/i915/intel_sprite.c +++ b/drivers/gpu/drm/i915/intel_sprite.c @@ -303,12 +303,11 @@ skl_plane_max_stride(struct intel_plane *plane, } static void -skl_program_scaler(struct drm_i915_private *dev_priv, - struct intel_plane *plane, +skl_program_scaler(struct intel_plane *plane, const struct intel_crtc_state *crtc_state, const struct intel_plane_state *plane_state) { - enum plane_id plane_id = plane->id; + struct drm_i915_private *dev_priv = to_i915(plane->base.dev); enum pipe pipe = plane->pipe; int scaler_id = plane_state->scaler_id; const struct intel_scaler *scaler = @@ -320,10 +319,6 @@ skl_program_scaler(struct drm_i915_private *dev_priv, u16 y_hphase, uv_rgb_hphase; u16 y_vphase, uv_rgb_vphase; - /* Sizes are 0 based */ - crtc_w--; - crtc_h--; - /* TODO: handle sub-pixel coordinates */ if (plane_state->base.fb->format->format == DRM_FORMAT_NV12) { y_hphase = skl_scaler_calc_phase(1, false); @@ -342,15 +337,14 @@ skl_program_scaler(struct drm_i915_private *dev_priv, } I915_WRITE_FW(SKL_PS_CTRL(pipe, scaler_id), - PS_SCALER_EN | PS_PLANE_SEL(plane_id) | scaler->mode); + PS_SCALER_EN | PS_PLANE_SEL(plane->id) | scaler->mode); I915_WRITE_FW(SKL_PS_PWR_GATE(pipe, scaler_id), 0); I915_WRITE_FW(SKL_PS_VPHASE(pipe, scaler_id), PS_Y_PHASE(y_vphase) | PS_UV_RGB_PHASE(uv_rgb_vphase)); I915_WRITE_FW(SKL_PS_HPHASE(pipe, scaler_id), PS_Y_PHASE(y_hphase) | PS_UV_RGB_PHASE(uv_rgb_hphase)); I915_WRITE_FW(SKL_PS_WIN_POS(pipe, scaler_id), (crtc_x << 16) | crtc_y); - I915_WRITE_FW(SKL_PS_WIN_SZ(pipe, scaler_id), - ((crtc_w + 1) << 16)|(crtc_h + 1)); + I915_WRITE_FW(SKL_PS_WIN_SZ(pipe, scaler_id), (crtc_w << 16) | crtc_h); } void @@ -399,9 +393,8 @@ skl_update_plane(struct intel_plane *plane, (plane_state->color_plane[1].y << 16) | plane_state->color_plane[1].x); - /* program plane scaler */ if (plane_state->scaler_id >= 0) { - skl_program_scaler(dev_priv, plane, crtc_state, plane_state); + skl_program_scaler(plane, crtc_state, plane_state); I915_WRITE_FW(PLANE_POS(pipe, plane_id), 0); } else { From 6e8adf6f4a4fa57dd3bef6b70de96e2b7b311204 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= <ville.syrjala@linux.intel.com> Date: Wed, 14 Nov 2018 15:32:55 +0200 Subject: [PATCH 418/443] drm/i915: Account for scale factor when calculating initial phase MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit To get the initial phase correct we need to account for the scale factor as well. I forgot this initially and was mostly looking at heavily upscaled content where the minor difference between -0.5 and the proper initial phase was not readily apparent. And let's toss in a comment that tries to explain the formula a little bit. v2: The initial phase upper limit is 1.5, not 24.0! Cc: Maarten Lankhorst <maarten.lankhorst@linux.intel.com> Fixes: 0a59952b24e2 ("drm/i915: Configure SKL+ scaler initial phase correctly") Signed-off-by: Ville Syrjälä <ville.syrjala@linux.intel.com> Link: https://patchwork.freedesktop.org/patch/msgid/20181029181820.21956-1-ville.syrjala@linux.intel.com Tested-by: Juha-Pekka Heikkila <juhapekka.heikkila@gmail.com> Tested-by: Maarten Lankhorst <maarten.lankhorst@linux.intel.com> #irc Reviewed-by: Maarten Lankhorst <maarten.lankhorst@linux.intel.com> #irc (cherry picked from commit e7a278a329dd8aa2c70c564849f164cb5673689c) Signed-off-by: Joonas Lahtinen <joonas.lahtinen@linux.intel.com> --- drivers/gpu/drm/i915/intel_display.c | 45 ++++++++++++++++++++++++++-- drivers/gpu/drm/i915/intel_drv.h | 2 +- drivers/gpu/drm/i915/intel_sprite.c | 20 +++++++++---- 3 files changed, 57 insertions(+), 10 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c index 23d8008a93bb6..a54843fdeb2f0 100644 --- a/drivers/gpu/drm/i915/intel_display.c +++ b/drivers/gpu/drm/i915/intel_display.c @@ -4850,8 +4850,31 @@ static void cpt_verify_modeset(struct drm_device *dev, int pipe) * chroma samples for both of the luma samples, and thus we don't * actually get the expected MPEG2 chroma siting convention :( * The same behaviour is observed on pre-SKL platforms as well. + * + * Theory behind the formula (note that we ignore sub-pixel + * source coordinates): + * s = source sample position + * d = destination sample position + * + * Downscaling 4:1: + * -0.5 + * | 0.0 + * | | 1.5 (initial phase) + * | | | + * v v v + * | s | s | s | s | + * | d | + * + * Upscaling 1:4: + * -0.5 + * | -0.375 (initial phase) + * | | 0.0 + * | | | + * v v v + * | s | + * | d | d | d | d | */ -u16 skl_scaler_calc_phase(int sub, bool chroma_cosited) +u16 skl_scaler_calc_phase(int sub, int scale, bool chroma_cosited) { int phase = -0x8000; u16 trip = 0; @@ -4859,6 +4882,15 @@ u16 skl_scaler_calc_phase(int sub, bool chroma_cosited) if (chroma_cosited) phase += (sub - 1) * 0x8000 / sub; + phase += scale / (2 * sub); + + /* + * Hardware initial phase limited to [-0.5:1.5]. + * Since the max hardware scale factor is 3.0, we + * should never actually excdeed 1.0 here. + */ + WARN_ON(phase < -0x8000 || phase > 0x18000); + if (phase < 0) phase = 0x10000 + phase; else @@ -5067,13 +5099,20 @@ static void skylake_pfit_enable(struct intel_crtc *crtc) if (crtc->config->pch_pfit.enabled) { u16 uv_rgb_hphase, uv_rgb_vphase; + int pfit_w, pfit_h, hscale, vscale; int id; if (WARN_ON(crtc->config->scaler_state.scaler_id < 0)) return; - uv_rgb_hphase = skl_scaler_calc_phase(1, false); - uv_rgb_vphase = skl_scaler_calc_phase(1, false); + pfit_w = (crtc->config->pch_pfit.size >> 16) & 0xFFFF; + pfit_h = crtc->config->pch_pfit.size & 0xFFFF; + + hscale = (crtc->config->pipe_src_w << 16) / pfit_w; + vscale = (crtc->config->pipe_src_h << 16) / pfit_h; + + uv_rgb_hphase = skl_scaler_calc_phase(1, hscale, false); + uv_rgb_vphase = skl_scaler_calc_phase(1, vscale, false); id = scaler_state->scaler_id; I915_WRITE(SKL_PS_CTRL(pipe, id), PS_SCALER_EN | diff --git a/drivers/gpu/drm/i915/intel_drv.h b/drivers/gpu/drm/i915/intel_drv.h index f8dc84b2d2d34..8b298e5f012da 100644 --- a/drivers/gpu/drm/i915/intel_drv.h +++ b/drivers/gpu/drm/i915/intel_drv.h @@ -1646,7 +1646,7 @@ void intel_mode_from_pipe_config(struct drm_display_mode *mode, void intel_crtc_arm_fifo_underrun(struct intel_crtc *crtc, struct intel_crtc_state *crtc_state); -u16 skl_scaler_calc_phase(int sub, bool chroma_center); +u16 skl_scaler_calc_phase(int sub, int scale, bool chroma_center); int skl_update_scaler_crtc(struct intel_crtc_state *crtc_state); int skl_max_scale(const struct intel_crtc_state *crtc_state, u32 pixel_format); diff --git a/drivers/gpu/drm/i915/intel_sprite.c b/drivers/gpu/drm/i915/intel_sprite.c index fa7eaace5f922..d3090a7537bb9 100644 --- a/drivers/gpu/drm/i915/intel_sprite.c +++ b/drivers/gpu/drm/i915/intel_sprite.c @@ -318,22 +318,30 @@ skl_program_scaler(struct intel_plane *plane, uint32_t crtc_h = drm_rect_height(&plane_state->base.dst); u16 y_hphase, uv_rgb_hphase; u16 y_vphase, uv_rgb_vphase; + int hscale, vscale; + + hscale = drm_rect_calc_hscale(&plane_state->base.src, + &plane_state->base.dst, + 0, INT_MAX); + vscale = drm_rect_calc_vscale(&plane_state->base.src, + &plane_state->base.dst, + 0, INT_MAX); /* TODO: handle sub-pixel coordinates */ if (plane_state->base.fb->format->format == DRM_FORMAT_NV12) { - y_hphase = skl_scaler_calc_phase(1, false); - y_vphase = skl_scaler_calc_phase(1, false); + y_hphase = skl_scaler_calc_phase(1, hscale, false); + y_vphase = skl_scaler_calc_phase(1, vscale, false); /* MPEG2 chroma siting convention */ - uv_rgb_hphase = skl_scaler_calc_phase(2, true); - uv_rgb_vphase = skl_scaler_calc_phase(2, false); + uv_rgb_hphase = skl_scaler_calc_phase(2, hscale, true); + uv_rgb_vphase = skl_scaler_calc_phase(2, vscale, false); } else { /* not used */ y_hphase = 0; y_vphase = 0; - uv_rgb_hphase = skl_scaler_calc_phase(1, false); - uv_rgb_vphase = skl_scaler_calc_phase(1, false); + uv_rgb_hphase = skl_scaler_calc_phase(1, hscale, false); + uv_rgb_vphase = skl_scaler_calc_phase(1, vscale, false); } I915_WRITE_FW(SKL_PS_CTRL(pipe, scaler_id), From b2fed34a628df6118b5d4e13f49a33e15f704fa9 Mon Sep 17 00:00:00 2001 From: Gustavo Romero <gromero@linux.vnet.ibm.com> Date: Wed, 14 Nov 2018 21:33:30 -0500 Subject: [PATCH 419/443] selftests/powerpc: Adjust wild_bctr to build with old binutils Currently the selftest wild_bctr can fail to build when an old gcc is used, notably on gcc using a binutils version <= 2.27, because the assembler does not support the integer suffix UL. This patch adjusts the wild_bctr test so the REG_POISON value is still treated as an unsigned long for the shifts on compilation but the UL suffix is absent on the stringification, so the inline asm code generated has no UL suffixes. Signed-off-by: Gustavo Romero <gromero@linux.vnet.ibm.com> [mpe: Wrap long line] Signed-off-by: Michael Ellerman <mpe@ellerman.id.au> --- tools/testing/selftests/powerpc/mm/wild_bctr.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/tools/testing/selftests/powerpc/mm/wild_bctr.c b/tools/testing/selftests/powerpc/mm/wild_bctr.c index 90469a9e49d40..f2fa101c5a6ac 100644 --- a/tools/testing/selftests/powerpc/mm/wild_bctr.c +++ b/tools/testing/selftests/powerpc/mm/wild_bctr.c @@ -47,8 +47,9 @@ static int ok(void) return 0; } -#define REG_POISON 0x5a5aUL -#define POISONED_REG(n) ((REG_POISON << 48) | ((n) << 32) | (REG_POISON << 16) | (n)) +#define REG_POISON 0x5a5a +#define POISONED_REG(n) ((((unsigned long)REG_POISON) << 48) | ((n) << 32) | \ + (((unsigned long)REG_POISON) << 16) | (n)) static inline void poison_regs(void) { From c26b5aa8ef0d46035060fded475e6ab957b9f69f Mon Sep 17 00:00:00 2001 From: Andreas Gruenbacher <agruenba@redhat.com> Date: Sun, 11 Nov 2018 11:15:21 +0000 Subject: [PATCH 420/443] gfs2: Fix iomap buffer head reference counting bug GFS2 passes the inode buffer head (dibh) from gfs2_iomap_begin to gfs2_iomap_end in iomap->private. It sets that private pointer in gfs2_iomap_get. Users of gfs2_iomap_get other than gfs2_iomap_begin would have to release iomap->private, but this isn't done correctly, leading to a leak of buffer head references. To fix this, move the code for setting iomap->private from gfs2_iomap_get to gfs2_iomap_begin. Fixes: 64bc06bb32 ("gfs2: iomap buffered write support") Cc: stable@vger.kernel.org # v4.19+ Signed-off-by: Andreas Gruenbacher <agruenba@redhat.com> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org> --- fs/gfs2/bmap.c | 40 +++++++++++++++++----------------------- 1 file changed, 17 insertions(+), 23 deletions(-) diff --git a/fs/gfs2/bmap.c b/fs/gfs2/bmap.c index 38d88fcb69883..0d643306c255b 100644 --- a/fs/gfs2/bmap.c +++ b/fs/gfs2/bmap.c @@ -826,7 +826,7 @@ static int gfs2_iomap_get(struct inode *inode, loff_t pos, loff_t length, ret = gfs2_meta_inode_buffer(ip, &dibh); if (ret) goto unlock; - iomap->private = dibh; + mp->mp_bh[0] = dibh; if (gfs2_is_stuffed(ip)) { if (flags & IOMAP_WRITE) { @@ -863,9 +863,6 @@ static int gfs2_iomap_get(struct inode *inode, loff_t pos, loff_t length, len = lblock_stop - lblock + 1; iomap->length = len << inode->i_blkbits; - get_bh(dibh); - mp->mp_bh[0] = dibh; - height = ip->i_height; while ((lblock + 1) * sdp->sd_sb.sb_bsize > sdp->sd_heightsize[height]) height++; @@ -898,8 +895,6 @@ static int gfs2_iomap_get(struct inode *inode, loff_t pos, loff_t length, iomap->bdev = inode->i_sb->s_bdev; unlock: up_read(&ip->i_rw_mutex); - if (ret && dibh) - brelse(dibh); return ret; do_alloc: @@ -980,9 +975,9 @@ static void gfs2_iomap_journaled_page_done(struct inode *inode, loff_t pos, static int gfs2_iomap_begin_write(struct inode *inode, loff_t pos, loff_t length, unsigned flags, - struct iomap *iomap) + struct iomap *iomap, + struct metapath *mp) { - struct metapath mp = { .mp_aheight = 1, }; struct gfs2_inode *ip = GFS2_I(inode); struct gfs2_sbd *sdp = GFS2_SB(inode); unsigned int data_blocks = 0, ind_blocks = 0, rblocks; @@ -996,9 +991,9 @@ static int gfs2_iomap_begin_write(struct inode *inode, loff_t pos, unstuff = gfs2_is_stuffed(ip) && pos + length > gfs2_max_stuffed_size(ip); - ret = gfs2_iomap_get(inode, pos, length, flags, iomap, &mp); + ret = gfs2_iomap_get(inode, pos, length, flags, iomap, mp); if (ret) - goto out_release; + goto out_unlock; alloc_required = unstuff || iomap->type == IOMAP_HOLE; @@ -1013,7 +1008,7 @@ static int gfs2_iomap_begin_write(struct inode *inode, loff_t pos, ret = gfs2_quota_lock_check(ip, &ap); if (ret) - goto out_release; + goto out_unlock; ret = gfs2_inplace_reserve(ip, &ap); if (ret) @@ -1038,17 +1033,15 @@ static int gfs2_iomap_begin_write(struct inode *inode, loff_t pos, ret = gfs2_unstuff_dinode(ip, NULL); if (ret) goto out_trans_end; - release_metapath(&mp); - brelse(iomap->private); - iomap->private = NULL; + release_metapath(mp); ret = gfs2_iomap_get(inode, iomap->offset, iomap->length, - flags, iomap, &mp); + flags, iomap, mp); if (ret) goto out_trans_end; } if (iomap->type == IOMAP_HOLE) { - ret = gfs2_iomap_alloc(inode, iomap, flags, &mp); + ret = gfs2_iomap_alloc(inode, iomap, flags, mp); if (ret) { gfs2_trans_end(sdp); gfs2_inplace_release(ip); @@ -1056,7 +1049,6 @@ static int gfs2_iomap_begin_write(struct inode *inode, loff_t pos, goto out_qunlock; } } - release_metapath(&mp); if (gfs2_is_jdata(ip)) iomap->page_done = gfs2_iomap_journaled_page_done; return 0; @@ -1069,10 +1061,7 @@ static int gfs2_iomap_begin_write(struct inode *inode, loff_t pos, out_qunlock: if (alloc_required) gfs2_quota_unlock(ip); -out_release: - if (iomap->private) - brelse(iomap->private); - release_metapath(&mp); +out_unlock: gfs2_write_unlock(inode); return ret; } @@ -1088,10 +1077,10 @@ static int gfs2_iomap_begin(struct inode *inode, loff_t pos, loff_t length, trace_gfs2_iomap_start(ip, pos, length, flags); if ((flags & IOMAP_WRITE) && !(flags & IOMAP_DIRECT)) { - ret = gfs2_iomap_begin_write(inode, pos, length, flags, iomap); + ret = gfs2_iomap_begin_write(inode, pos, length, flags, iomap, &mp); } else { ret = gfs2_iomap_get(inode, pos, length, flags, iomap, &mp); - release_metapath(&mp); + /* * Silently fall back to buffered I/O for stuffed files or if * we've hot a hole (see gfs2_file_direct_write). @@ -1100,6 +1089,11 @@ static int gfs2_iomap_begin(struct inode *inode, loff_t pos, loff_t length, iomap->type != IOMAP_MAPPED) ret = -ENOTBLK; } + if (!ret) { + get_bh(mp.mp_bh[0]); + iomap->private = mp.mp_bh[0]; + } + release_metapath(&mp); trace_gfs2_iomap_end(ip, iomap, ret); return ret; } From ca0246bb97c23da9d267c2107c07fb77e38205c9 Mon Sep 17 00:00:00 2001 From: Vitaly Wool <vitalywool@gmail.com> Date: Fri, 16 Nov 2018 15:07:56 -0800 Subject: [PATCH 421/443] z3fold: fix possible reclaim races Reclaim and free can race on an object which is basically fine but in order for reclaim to be able to map "freed" object we need to encode object length in the handle. handle_to_chunks() is then introduced to extract object length from a handle and use it during mapping. Moreover, to avoid racing on a z3fold "headless" page release, we should not try to free that page in z3fold_free() if the reclaim bit is set. Also, in the unlikely case of trying to reclaim a page being freed, we should not proceed with that page. While at it, fix the page accounting in reclaim function. This patch supersedes "[PATCH] z3fold: fix reclaim lock-ups". Link: http://lkml.kernel.org/r/20181105162225.74e8837d03583a9b707cf559@gmail.com Signed-off-by: Vitaly Wool <vitaly.vul@sony.com> Signed-off-by: Jongseok Kim <ks77sj@gmail.com> Reported-by-by: Jongseok Kim <ks77sj@gmail.com> Reviewed-by: Snild Dolkow <snild@sony.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org> --- mm/z3fold.c | 101 ++++++++++++++++++++++++++++++++-------------------- 1 file changed, 62 insertions(+), 39 deletions(-) diff --git a/mm/z3fold.c b/mm/z3fold.c index 4b366d181f35d..aee9b0b8d9078 100644 --- a/mm/z3fold.c +++ b/mm/z3fold.c @@ -99,6 +99,7 @@ struct z3fold_header { #define NCHUNKS ((PAGE_SIZE - ZHDR_SIZE_ALIGNED) >> CHUNK_SHIFT) #define BUDDY_MASK (0x3) +#define BUDDY_SHIFT 2 /** * struct z3fold_pool - stores metadata for each z3fold pool @@ -145,7 +146,7 @@ enum z3fold_page_flags { MIDDLE_CHUNK_MAPPED, NEEDS_COMPACTING, PAGE_STALE, - UNDER_RECLAIM + PAGE_CLAIMED, /* by either reclaim or free */ }; /***************** @@ -174,7 +175,7 @@ static struct z3fold_header *init_z3fold_page(struct page *page, clear_bit(MIDDLE_CHUNK_MAPPED, &page->private); clear_bit(NEEDS_COMPACTING, &page->private); clear_bit(PAGE_STALE, &page->private); - clear_bit(UNDER_RECLAIM, &page->private); + clear_bit(PAGE_CLAIMED, &page->private); spin_lock_init(&zhdr->page_lock); kref_init(&zhdr->refcount); @@ -223,8 +224,11 @@ static unsigned long encode_handle(struct z3fold_header *zhdr, enum buddy bud) unsigned long handle; handle = (unsigned long)zhdr; - if (bud != HEADLESS) - handle += (bud + zhdr->first_num) & BUDDY_MASK; + if (bud != HEADLESS) { + handle |= (bud + zhdr->first_num) & BUDDY_MASK; + if (bud == LAST) + handle |= (zhdr->last_chunks << BUDDY_SHIFT); + } return handle; } @@ -234,6 +238,12 @@ static struct z3fold_header *handle_to_z3fold_header(unsigned long handle) return (struct z3fold_header *)(handle & PAGE_MASK); } +/* only for LAST bud, returns zero otherwise */ +static unsigned short handle_to_chunks(unsigned long handle) +{ + return (handle & ~PAGE_MASK) >> BUDDY_SHIFT; +} + /* * (handle & BUDDY_MASK) < zhdr->first_num is possible in encode_handle * but that doesn't matter. because the masking will result in the @@ -720,37 +730,39 @@ static void z3fold_free(struct z3fold_pool *pool, unsigned long handle) page = virt_to_page(zhdr); if (test_bit(PAGE_HEADLESS, &page->private)) { - /* HEADLESS page stored */ - bud = HEADLESS; - } else { - z3fold_page_lock(zhdr); - bud = handle_to_buddy(handle); - - switch (bud) { - case FIRST: - zhdr->first_chunks = 0; - break; - case MIDDLE: - zhdr->middle_chunks = 0; - zhdr->start_middle = 0; - break; - case LAST: - zhdr->last_chunks = 0; - break; - default: - pr_err("%s: unknown bud %d\n", __func__, bud); - WARN_ON(1); - z3fold_page_unlock(zhdr); - return; + /* if a headless page is under reclaim, just leave. + * NB: we use test_and_set_bit for a reason: if the bit + * has not been set before, we release this page + * immediately so we don't care about its value any more. + */ + if (!test_and_set_bit(PAGE_CLAIMED, &page->private)) { + spin_lock(&pool->lock); + list_del(&page->lru); + spin_unlock(&pool->lock); + free_z3fold_page(page); + atomic64_dec(&pool->pages_nr); } + return; } - if (bud == HEADLESS) { - spin_lock(&pool->lock); - list_del(&page->lru); - spin_unlock(&pool->lock); - free_z3fold_page(page); - atomic64_dec(&pool->pages_nr); + /* Non-headless case */ + z3fold_page_lock(zhdr); + bud = handle_to_buddy(handle); + + switch (bud) { + case FIRST: + zhdr->first_chunks = 0; + break; + case MIDDLE: + zhdr->middle_chunks = 0; + break; + case LAST: + zhdr->last_chunks = 0; + break; + default: + pr_err("%s: unknown bud %d\n", __func__, bud); + WARN_ON(1); + z3fold_page_unlock(zhdr); return; } @@ -758,7 +770,7 @@ static void z3fold_free(struct z3fold_pool *pool, unsigned long handle) atomic64_dec(&pool->pages_nr); return; } - if (test_bit(UNDER_RECLAIM, &page->private)) { + if (test_bit(PAGE_CLAIMED, &page->private)) { z3fold_page_unlock(zhdr); return; } @@ -836,20 +848,30 @@ static int z3fold_reclaim_page(struct z3fold_pool *pool, unsigned int retries) } list_for_each_prev(pos, &pool->lru) { page = list_entry(pos, struct page, lru); + + /* this bit could have been set by free, in which case + * we pass over to the next page in the pool. + */ + if (test_and_set_bit(PAGE_CLAIMED, &page->private)) + continue; + + zhdr = page_address(page); if (test_bit(PAGE_HEADLESS, &page->private)) - /* candidate found */ break; - zhdr = page_address(page); - if (!z3fold_page_trylock(zhdr)) + if (!z3fold_page_trylock(zhdr)) { + zhdr = NULL; continue; /* can't evict at this point */ + } kref_get(&zhdr->refcount); list_del_init(&zhdr->buddy); zhdr->cpu = -1; - set_bit(UNDER_RECLAIM, &page->private); break; } + if (!zhdr) + break; + list_del_init(&page->lru); spin_unlock(&pool->lock); @@ -898,6 +920,7 @@ static int z3fold_reclaim_page(struct z3fold_pool *pool, unsigned int retries) if (test_bit(PAGE_HEADLESS, &page->private)) { if (ret == 0) { free_z3fold_page(page); + atomic64_dec(&pool->pages_nr); return 0; } spin_lock(&pool->lock); @@ -905,7 +928,7 @@ static int z3fold_reclaim_page(struct z3fold_pool *pool, unsigned int retries) spin_unlock(&pool->lock); } else { z3fold_page_lock(zhdr); - clear_bit(UNDER_RECLAIM, &page->private); + clear_bit(PAGE_CLAIMED, &page->private); if (kref_put(&zhdr->refcount, release_z3fold_page_locked)) { atomic64_dec(&pool->pages_nr); @@ -964,7 +987,7 @@ static void *z3fold_map(struct z3fold_pool *pool, unsigned long handle) set_bit(MIDDLE_CHUNK_MAPPED, &page->private); break; case LAST: - addr += PAGE_SIZE - (zhdr->last_chunks << CHUNK_SHIFT); + addr += PAGE_SIZE - (handle_to_chunks(handle) << CHUNK_SHIFT); break; default: pr_err("unknown buddy id %d\n", buddy); From 8fcb2312d1e3300e81aa871aad00d4c038cfc184 Mon Sep 17 00:00:00 2001 From: Olof Johansson <olof@lixom.net> Date: Fri, 16 Nov 2018 15:08:00 -0800 Subject: [PATCH 422/443] kernel/sched/psi.c: simplify cgroup_move_task() The existing code triggered an invalid warning about 'rq' possibly being used uninitialized. Instead of doing the silly warning suppression by initializa it to NULL, refactor the code to bail out early instead. Warning was: kernel/sched/psi.c: In function `cgroup_move_task': kernel/sched/psi.c:639:13: warning: `rq' may be used uninitialized in this function [-Wmaybe-uninitialized] Link: http://lkml.kernel.org/r/20181103183339.8669-1-olof@lixom.net Fixes: 2ce7135adc9ad ("psi: cgroup support") Signed-off-by: Olof Johansson <olof@lixom.net> Reviewed-by: Andrew Morton <akpm@linux-foundation.org> Acked-by: Johannes Weiner <hannes@cmpxchg.org> Cc: Ingo Molnar <mingo@redhat.com> Cc: Peter Zijlstra <peterz@infradead.org> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org> --- kernel/sched/psi.c | 43 ++++++++++++++++++++++--------------------- 1 file changed, 22 insertions(+), 21 deletions(-) diff --git a/kernel/sched/psi.c b/kernel/sched/psi.c index 7cdecfc010af8..3d7355d7c3e38 100644 --- a/kernel/sched/psi.c +++ b/kernel/sched/psi.c @@ -633,38 +633,39 @@ void psi_cgroup_free(struct cgroup *cgroup) */ void cgroup_move_task(struct task_struct *task, struct css_set *to) { - bool move_psi = !psi_disabled; unsigned int task_flags = 0; struct rq_flags rf; struct rq *rq; - if (move_psi) { - rq = task_rq_lock(task, &rf); + if (psi_disabled) { + /* + * Lame to do this here, but the scheduler cannot be locked + * from the outside, so we move cgroups from inside sched/. + */ + rcu_assign_pointer(task->cgroups, to); + return; + } - if (task_on_rq_queued(task)) - task_flags = TSK_RUNNING; - else if (task->in_iowait) - task_flags = TSK_IOWAIT; + rq = task_rq_lock(task, &rf); - if (task->flags & PF_MEMSTALL) - task_flags |= TSK_MEMSTALL; + if (task_on_rq_queued(task)) + task_flags = TSK_RUNNING; + else if (task->in_iowait) + task_flags = TSK_IOWAIT; - if (task_flags) - psi_task_change(task, task_flags, 0); - } + if (task->flags & PF_MEMSTALL) + task_flags |= TSK_MEMSTALL; - /* - * Lame to do this here, but the scheduler cannot be locked - * from the outside, so we move cgroups from inside sched/. - */ + if (task_flags) + psi_task_change(task, task_flags, 0); + + /* See comment above */ rcu_assign_pointer(task->cgroups, to); - if (move_psi) { - if (task_flags) - psi_task_change(task, 0, task_flags); + if (task_flags) + psi_task_change(task, 0, task_flags); - task_rq_unlock(rq, task, &rf); - } + task_rq_unlock(rq, task, &rf); } #endif /* CONFIG_CGROUPS */ From 5e41540c8a0f0e98c337dda8b391e5dda0cde7cf Mon Sep 17 00:00:00 2001 From: Mike Kravetz <mike.kravetz@oracle.com> Date: Fri, 16 Nov 2018 15:08:04 -0800 Subject: [PATCH 423/443] hugetlbfs: fix kernel BUG at fs/hugetlbfs/inode.c:444! This bug has been experienced several times by the Oracle DB team. The BUG is in remove_inode_hugepages() as follows: /* * If page is mapped, it was faulted in after being * unmapped in caller. Unmap (again) now after taking * the fault mutex. The mutex will prevent faults * until we finish removing the page. * * This race can only happen in the hole punch case. * Getting here in a truncate operation is a bug. */ if (unlikely(page_mapped(page))) { BUG_ON(truncate_op); In this case, the elevated map count is not the result of a race. Rather it was incorrectly incremented as the result of a bug in the huge pmd sharing code. Consider the following: - Process A maps a hugetlbfs file of sufficient size and alignment (PUD_SIZE) that a pmd page could be shared. - Process B maps the same hugetlbfs file with the same size and alignment such that a pmd page is shared. - Process B then calls mprotect() to change protections for the mapping with the shared pmd. As a result, the pmd is 'unshared'. - Process B then calls mprotect() again to chage protections for the mapping back to their original value. pmd remains unshared. - Process B then forks and process C is created. During the fork process, we do dup_mm -> dup_mmap -> copy_page_range to copy page tables. Copying page tables for hugetlb mappings is done in the routine copy_hugetlb_page_range. In copy_hugetlb_page_range(), the destination pte is obtained by: dst_pte = huge_pte_alloc(dst, addr, sz); If pmd sharing is possible, the returned pointer will be to a pte in an existing page table. In the situation above, process C could share with either process A or process B. Since process A is first in the list, the returned pte is a pointer to a pte in process A's page table. However, the check for pmd sharing in copy_hugetlb_page_range is: /* If the pagetables are shared don't copy or take references */ if (dst_pte == src_pte) continue; Since process C is sharing with process A instead of process B, the above test fails. The code in copy_hugetlb_page_range which follows assumes dst_pte points to a huge_pte_none pte. It copies the pte entry from src_pte to dst_pte and increments this map count of the associated page. This is how we end up with an elevated map count. To solve, check the dst_pte entry for huge_pte_none. If !none, this implies PMD sharing so do not copy. Link: http://lkml.kernel.org/r/20181105212315.14125-1-mike.kravetz@oracle.com Fixes: c5c99429fa57 ("fix hugepages leak due to pagetable page sharing") Signed-off-by: Mike Kravetz <mike.kravetz@oracle.com> Reviewed-by: Naoya Horiguchi <n-horiguchi@ah.jp.nec.com> Cc: Michal Hocko <mhocko@kernel.org> Cc: Hugh Dickins <hughd@google.com> Cc: Andrea Arcangeli <aarcange@redhat.com> Cc: "Kirill A . Shutemov" <kirill.shutemov@linux.intel.com> Cc: Davidlohr Bueso <dave@stgolabs.net> Cc: Prakash Sangappa <prakash.sangappa@oracle.com> Cc: <stable@vger.kernel.org> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org> --- mm/hugetlb.c | 23 +++++++++++++++++++---- 1 file changed, 19 insertions(+), 4 deletions(-) diff --git a/mm/hugetlb.c b/mm/hugetlb.c index c007fb5fb8d5f..7f2a28ab46d53 100644 --- a/mm/hugetlb.c +++ b/mm/hugetlb.c @@ -3233,7 +3233,7 @@ static int is_hugetlb_entry_hwpoisoned(pte_t pte) int copy_hugetlb_page_range(struct mm_struct *dst, struct mm_struct *src, struct vm_area_struct *vma) { - pte_t *src_pte, *dst_pte, entry; + pte_t *src_pte, *dst_pte, entry, dst_entry; struct page *ptepage; unsigned long addr; int cow; @@ -3261,15 +3261,30 @@ int copy_hugetlb_page_range(struct mm_struct *dst, struct mm_struct *src, break; } - /* If the pagetables are shared don't copy or take references */ - if (dst_pte == src_pte) + /* + * If the pagetables are shared don't copy or take references. + * dst_pte == src_pte is the common case of src/dest sharing. + * + * However, src could have 'unshared' and dst shares with + * another vma. If dst_pte !none, this implies sharing. + * Check here before taking page table lock, and once again + * after taking the lock below. + */ + dst_entry = huge_ptep_get(dst_pte); + if ((dst_pte == src_pte) || !huge_pte_none(dst_entry)) continue; dst_ptl = huge_pte_lock(h, dst, dst_pte); src_ptl = huge_pte_lockptr(h, src, src_pte); spin_lock_nested(src_ptl, SINGLE_DEPTH_NESTING); entry = huge_ptep_get(src_pte); - if (huge_pte_none(entry)) { /* skip none entry */ + dst_entry = huge_ptep_get(dst_pte); + if (huge_pte_none(entry) || !huge_pte_none(dst_entry)) { + /* + * Skip if src entry none. Also, skip in the + * unlikely case dst entry !none as this implies + * sharing with another vma. + */ ; } else if (unlikely(is_hugetlb_entry_migration(entry) || is_hugetlb_entry_hwpoisoned(entry))) { From f341e16fb67ddc199582d187b0d39120a9dfd2bf Mon Sep 17 00:00:00 2001 From: Aaro Koskinen <aaro.koskinen@iki.fi> Date: Fri, 16 Nov 2018 15:08:08 -0800 Subject: [PATCH 424/443] MAINTAINERS: update OMAP MMC entry Jarkko's e-mail address hasn't worked for a long time. We still want to keep this driver working as it is critical for some of the OMAP boards. I use and test this driver frequently, so change myself as a maintainer with "Odd Fixes" status. Link: http://lkml.kernel.org/r/20181106222750.12939-1-aaro.koskinen@iki.fi Signed-off-by: Aaro Koskinen <aaro.koskinen@iki.fi> Acked-by: Tony Lindgren <tony@atomide.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org> --- CREDITS | 4 ++++ MAINTAINERS | 4 ++-- 2 files changed, 6 insertions(+), 2 deletions(-) diff --git a/CREDITS b/CREDITS index 5befd2d714d00..84cbec4c62115 100644 --- a/CREDITS +++ b/CREDITS @@ -2138,6 +2138,10 @@ E: paul@laufernet.com D: Soundblaster driver fixes, ISAPnP quirk S: California, USA +N: Jarkko Lavinen +E: jarkko.lavinen@nokia.com +D: OMAP MMC support + N: Jonathan Layes D: ARPD support diff --git a/MAINTAINERS b/MAINTAINERS index 6c3fbbb361f82..b755a89fa3256 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -10808,9 +10808,9 @@ F: drivers/media/platform/omap3isp/ F: drivers/staging/media/omap4iss/ OMAP MMC SUPPORT -M: Jarkko Lavinen <jarkko.lavinen@nokia.com> +M: Aaro Koskinen <aaro.koskinen@iki.fi> L: linux-omap@vger.kernel.org -S: Maintained +S: Odd Fixes F: drivers/mmc/host/omap.c OMAP POWER MANAGEMENT SUPPORT From 873d7bcfd066663e3e50113dc4a0de19289b6354 Mon Sep 17 00:00:00 2001 From: Vasily Averin <vvs@virtuozzo.com> Date: Fri, 16 Nov 2018 15:08:11 -0800 Subject: [PATCH 425/443] mm/swapfile.c: use kvzalloc for swap_info_struct allocation Commit a2468cc9bfdf ("swap: choose swap device according to numa node") changed 'avail_lists' field of 'struct swap_info_struct' to an array. In popular linux distros it increased size of swap_info_struct up to 40 Kbytes and now swap_info_struct allocation requires order-4 page. Switch to kvzmalloc allows to avoid unexpected allocation failures. Link: http://lkml.kernel.org/r/fc23172d-3c75-21e2-d551-8b1808cbe593@virtuozzo.com Fixes: a2468cc9bfdf ("swap: choose swap device according to numa node") Signed-off-by: Vasily Averin <vvs@virtuozzo.com> Acked-by: Aaron Lu <aaron.lu@intel.com> Acked-by: Michal Hocko <mhocko@suse.com> Reviewed-by: Andrew Morton <akpm@linux-foundation.org> Cc: Huang Ying <ying.huang@intel.com> Cc: <stable@vger.kernel.org> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org> --- mm/swapfile.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/mm/swapfile.c b/mm/swapfile.c index 644f746e167ac..8688ae65ef58a 100644 --- a/mm/swapfile.c +++ b/mm/swapfile.c @@ -2813,7 +2813,7 @@ static struct swap_info_struct *alloc_swap_info(void) unsigned int type; int i; - p = kzalloc(sizeof(*p), GFP_KERNEL); + p = kvzalloc(sizeof(*p), GFP_KERNEL); if (!p) return ERR_PTR(-ENOMEM); @@ -2824,7 +2824,7 @@ static struct swap_info_struct *alloc_swap_info(void) } if (type >= MAX_SWAPFILES) { spin_unlock(&swap_lock); - kfree(p); + kvfree(p); return ERR_PTR(-EPERM); } if (type >= nr_swapfiles) { @@ -2838,7 +2838,7 @@ static struct swap_info_struct *alloc_swap_info(void) smp_wmb(); nr_swapfiles++; } else { - kfree(p); + kvfree(p); p = swap_info[type]; /* * Do not memset this entry: a racing procfs swap_next() From 9d7899999c62c1a81129b76d2a6ecbc4655e1597 Mon Sep 17 00:00:00 2001 From: Michal Hocko <mhocko@suse.com> Date: Fri, 16 Nov 2018 15:08:15 -0800 Subject: [PATCH 426/443] mm, memory_hotplug: check zone_movable in has_unmovable_pages Page state checks are racy. Under a heavy memory workload (e.g. stress -m 200 -t 2h) it is quite easy to hit a race window when the page is allocated but its state is not fully populated yet. A debugging patch to dump the struct page state shows has_unmovable_pages: pfn:0x10dfec00, found:0x1, count:0x0 page:ffffea0437fb0000 count:1 mapcount:1 mapping:ffff880e05239841 index:0x7f26e5000 compound_mapcount: 1 flags: 0x5fffffc0090034(uptodate|lru|active|head|swapbacked) Note that the state has been checked for both PageLRU and PageSwapBacked already. Closing this race completely would require some sort of retry logic. This can be tricky and error prone (think of potential endless or long taking loops). Workaround this problem for movable zones at least. Such a zone should only contain movable pages. Commit 15c30bc09085 ("mm, memory_hotplug: make has_unmovable_pages more robust") has told us that this is not strictly true though. Bootmem pages should be marked reserved though so we can move the original check after the PageReserved check. Pages from other zones are still prone to races but we even do not pretend that memory hotremove works for those so pre-mature failure doesn't hurt that much. Link: http://lkml.kernel.org/r/20181106095524.14629-1-mhocko@kernel.org Fixes: 15c30bc09085 ("mm, memory_hotplug: make has_unmovable_pages more robust") Signed-off-by: Michal Hocko <mhocko@suse.com> Reported-by: Baoquan He <bhe@redhat.com> Tested-by: Baoquan He <bhe@redhat.com> Acked-by: Baoquan He <bhe@redhat.com> Reviewed-by: Oscar Salvador <osalvador@suse.de> Acked-by: Balbir Singh <bsingharora@gmail.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org> --- mm/page_alloc.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/mm/page_alloc.c b/mm/page_alloc.c index a919ba5cb3c84..a0e5ec0addd24 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -7788,6 +7788,14 @@ bool has_unmovable_pages(struct zone *zone, struct page *page, int count, if (PageReserved(page)) goto unmovable; + /* + * If the zone is movable and we have ruled out all reserved + * pages then it should be reasonably safe to assume the rest + * is movable. + */ + if (zone_idx(zone) == ZONE_MOVABLE) + continue; + /* * Hugepages are not in LRU lists, but they're movable. * We need not scan over tail pages bacause we don't From a76cf1a474d7dbcd9336b5f5afb0162baa142cf0 Mon Sep 17 00:00:00 2001 From: Roman Gushchin <guro@fb.com> Date: Fri, 16 Nov 2018 15:08:18 -0800 Subject: [PATCH 427/443] mm: don't reclaim inodes with many attached pages Spock reported that commit 172b06c32b94 ("mm: slowly shrink slabs with a relatively small number of objects") leads to a regression on his setup: periodically the majority of the pagecache is evicted without an obvious reason, while before the change the amount of free memory was balancing around the watermark. The reason behind is that the mentioned above change created some minimal background pressure on the inode cache. The problem is that if an inode is considered to be reclaimed, all belonging pagecache page are stripped, no matter how many of them are there. So, if a huge multi-gigabyte file is cached in the memory, and the goal is to reclaim only few slab objects (unused inodes), we still can eventually evict all gigabytes of the pagecache at once. The workload described by Spock has few large non-mapped files in the pagecache, so it's especially noticeable. To solve the problem let's postpone the reclaim of inodes, which have more than 1 attached page. Let's wait until the pagecache pages will be evicted naturally by scanning the corresponding LRU lists, and only then reclaim the inode structure. Link: http://lkml.kernel.org/r/20181023164302.20436-1-guro@fb.com Signed-off-by: Roman Gushchin <guro@fb.com> Reported-by: Spock <dairinin@gmail.com> Tested-by: Spock <dairinin@gmail.com> Reviewed-by: Andrew Morton <akpm@linux-foundation.org> Cc: Michal Hocko <mhocko@kernel.org> Cc: Rik van Riel <riel@surriel.com> Cc: Randy Dunlap <rdunlap@infradead.org> Cc: <stable@vger.kernel.org> [4.19.x] Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org> --- fs/inode.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/fs/inode.c b/fs/inode.c index 9e198f00b64c6..35d2108d567c2 100644 --- a/fs/inode.c +++ b/fs/inode.c @@ -730,8 +730,11 @@ static enum lru_status inode_lru_isolate(struct list_head *item, return LRU_REMOVED; } - /* recently referenced inodes get one more pass */ - if (inode->i_state & I_REFERENCED) { + /* + * Recently referenced inodes and inodes with many attached pages + * get one more pass. + */ + if (inode->i_state & I_REFERENCED || inode->i_data.nrpages > 1) { inode->i_state &= ~I_REFERENCED; spin_unlock(&inode->i_lock); return LRU_ROTATE; From f5f67cc0e0d3d17ee046ba83f831f267767b8554 Mon Sep 17 00:00:00 2001 From: Randy Dunlap <rdunlap@infradead.org> Date: Fri, 16 Nov 2018 15:08:22 -0800 Subject: [PATCH 428/443] scripts/faddr2line: fix location of start_kernel in comment Fix a source file reference location to the correct path name. Link: http://lkml.kernel.org/r/1d50bd3d-178e-dcd8-779f-9711887440eb@infradead.org Signed-off-by: Randy Dunlap <rdunlap@infradead.org> Acked-by: Josh Poimboeuf <jpoimboe@redhat.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org> --- scripts/faddr2line | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/faddr2line b/scripts/faddr2line index a0149db00be75..6c6439f69a725 100755 --- a/scripts/faddr2line +++ b/scripts/faddr2line @@ -71,7 +71,7 @@ die() { # Try to figure out the source directory prefix so we can remove it from the # addr2line output. HACK ALERT: This assumes that start_kernel() is in -# kernel/init.c! This only works for vmlinux. Otherwise it falls back to +# init/main.c! This only works for vmlinux. Otherwise it falls back to # printing the absolute path. find_dir_prefix() { local objfile=$1 From 5040f8df56fb90c7919f1c9b0b6e54c843437456 Mon Sep 17 00:00:00 2001 From: Wengang Wang <wen.gang.wang@oracle.com> Date: Fri, 16 Nov 2018 15:08:25 -0800 Subject: [PATCH 429/443] ocfs2: free up write context when direct IO failed The write context should also be freed even when direct IO failed. Otherwise a memory leak is introduced and entries remain in oi->ip_unwritten_list causing the following BUG later in unlink path: ERROR: bug expression: !list_empty(&oi->ip_unwritten_list) ERROR: Clear inode of 215043, inode has unwritten extents ... Call Trace: ? __set_current_blocked+0x42/0x68 ocfs2_evict_inode+0x91/0x6a0 [ocfs2] ? bit_waitqueue+0x40/0x33 evict+0xdb/0x1af iput+0x1a2/0x1f7 do_unlinkat+0x194/0x28f SyS_unlinkat+0x1b/0x2f do_syscall_64+0x79/0x1ae entry_SYSCALL_64_after_hwframe+0x151/0x0 This patch also logs, with frequency limit, direct IO failures. Link: http://lkml.kernel.org/r/20181102170632.25921-1-wen.gang.wang@oracle.com Signed-off-by: Wengang Wang <wen.gang.wang@oracle.com> Reviewed-by: Junxiao Bi <junxiao.bi@oracle.com> Reviewed-by: Changwei Ge <ge.changwei@h3c.com> Reviewed-by: Joseph Qi <jiangqi903@gmail.com> Cc: Mark Fasheh <mark@fasheh.com> Cc: Joel Becker <jlbec@evilplan.org> Cc: <stable@vger.kernel.org> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org> --- fs/ocfs2/aops.c | 12 ++++++++++-- fs/ocfs2/cluster/masklog.h | 9 +++++++++ 2 files changed, 19 insertions(+), 2 deletions(-) diff --git a/fs/ocfs2/aops.c b/fs/ocfs2/aops.c index da578ad4c08f4..eb1ce30412dc3 100644 --- a/fs/ocfs2/aops.c +++ b/fs/ocfs2/aops.c @@ -2411,8 +2411,16 @@ static int ocfs2_dio_end_io(struct kiocb *iocb, /* this io's submitter should not have unlocked this before we could */ BUG_ON(!ocfs2_iocb_is_rw_locked(iocb)); - if (bytes > 0 && private) - ret = ocfs2_dio_end_io_write(inode, private, offset, bytes); + if (bytes <= 0) + mlog_ratelimited(ML_ERROR, "Direct IO failed, bytes = %lld", + (long long)bytes); + if (private) { + if (bytes > 0) + ret = ocfs2_dio_end_io_write(inode, private, offset, + bytes); + else + ocfs2_dio_free_write_ctx(inode, private); + } ocfs2_iocb_clear_rw_locked(iocb); diff --git a/fs/ocfs2/cluster/masklog.h b/fs/ocfs2/cluster/masklog.h index 308ea0eb35fd1..a396096a5099f 100644 --- a/fs/ocfs2/cluster/masklog.h +++ b/fs/ocfs2/cluster/masklog.h @@ -178,6 +178,15 @@ do { \ ##__VA_ARGS__); \ } while (0) +#define mlog_ratelimited(mask, fmt, ...) \ +do { \ + static DEFINE_RATELIMIT_STATE(_rs, \ + DEFAULT_RATELIMIT_INTERVAL, \ + DEFAULT_RATELIMIT_BURST); \ + if (__ratelimit(&_rs)) \ + mlog(mask, fmt, ##__VA_ARGS__); \ +} while (0) + #define mlog_errno(st) ({ \ int _st = (st); \ if (_st != -ERESTARTSYS && _st != -EINTR && \ From 78179556e7605ba07fd3ddba6ab8891fa457b93e Mon Sep 17 00:00:00 2001 From: Mike Rapoport <rppt@linux.ibm.com> Date: Fri, 16 Nov 2018 15:08:29 -0800 Subject: [PATCH 430/443] mm/gup.c: fix follow_page_mask() kerneldoc comment Commit df06b37ffe5a ("mm/gup: cache dev_pagemap while pinning pages") modified the signature of follow_page_mask() but left the parameter description behind. Update the description to make the code and comments agree again. While at it, update formatting of the return value description to match Documentation/doc-guide/kernel-doc.rst guidelines. Link: http://lkml.kernel.org/r/1541603316-27832-1-git-send-email-rppt@linux.ibm.com Signed-off-by: Mike Rapoport <rppt@linux.ibm.com> Reviewed-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org> --- mm/gup.c | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/mm/gup.c b/mm/gup.c index f76e77a2d34b7..aa43620a3270e 100644 --- a/mm/gup.c +++ b/mm/gup.c @@ -385,11 +385,17 @@ static struct page *follow_p4d_mask(struct vm_area_struct *vma, * @vma: vm_area_struct mapping @address * @address: virtual address to look up * @flags: flags modifying lookup behaviour - * @page_mask: on output, *page_mask is set according to the size of the page + * @ctx: contains dev_pagemap for %ZONE_DEVICE memory pinning and a + * pointer to output page_mask * * @flags can have FOLL_ flags set, defined in <linux/mm.h> * - * Returns the mapped (struct page *), %NULL if no mapping exists, or + * When getting pages from ZONE_DEVICE memory, the @ctx->pgmap caches + * the device's dev_pagemap metadata to avoid repeating expensive lookups. + * + * On output, the @ctx->page_mask is set according to the size of the page. + * + * Return: the mapped (struct page *), %NULL if no mapping exists, or * an error pointer if there is a mapping to something not represented * by a page descriptor (see also vm_normal_page()). */ From 13c9aaf7fa01cc7600c61981609feadeef3354ec Mon Sep 17 00:00:00 2001 From: Janne Huttunen <janne.huttunen@nokia.com> Date: Fri, 16 Nov 2018 15:08:32 -0800 Subject: [PATCH 431/443] mm/vmstat.c: fix NUMA statistics updates Scan through the whole array to see if an update is needed. While we're at it, use sizeof() to be safe against any possible type changes in the future. The bug here is that we wouldn't sync per-cpu counters into global ones if there was an update of numa_stats for higher cpus. Highly theoretical one though because it is much more probable that zone_stats are updated so we would refresh anyway. So I wouldn't bother to mark this for stable, yet something nice to fix. [mhocko@suse.com: changelog enhancement] Link: http://lkml.kernel.org/r/1541601517-17282-1-git-send-email-janne.huttunen@nokia.com Fixes: 1d90ca897cb0 ("mm: update NUMA counter threshold size") Signed-off-by: Janne Huttunen <janne.huttunen@nokia.com> Acked-by: Michal Hocko <mhocko@suse.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org> --- mm/vmstat.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/mm/vmstat.c b/mm/vmstat.c index 6038ce593ce3e..9c624595e9041 100644 --- a/mm/vmstat.c +++ b/mm/vmstat.c @@ -1827,12 +1827,13 @@ static bool need_update(int cpu) /* * The fast way of checking if there are any vmstat diffs. - * This works because the diffs are byte sized items. */ - if (memchr_inv(p->vm_stat_diff, 0, NR_VM_ZONE_STAT_ITEMS)) + if (memchr_inv(p->vm_stat_diff, 0, NR_VM_ZONE_STAT_ITEMS * + sizeof(p->vm_stat_diff[0]))) return true; #ifdef CONFIG_NUMA - if (memchr_inv(p->vm_numa_stat_diff, 0, NR_VM_NUMA_STAT_ITEMS)) + if (memchr_inv(p->vm_numa_stat_diff, 0, NR_VM_NUMA_STAT_ITEMS * + sizeof(p->vm_numa_stat_diff[0]))) return true; #endif } From 1c23b4108d716cc848b38532063a8aca4f86add8 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann <arnd@arndb.de> Date: Fri, 16 Nov 2018 15:08:35 -0800 Subject: [PATCH 432/443] lib/ubsan.c: don't mark __ubsan_handle_builtin_unreachable as noreturn gcc-8 complains about the prototype for this function: lib/ubsan.c:432:1: error: ignoring attribute 'noreturn' in declaration of a built-in function '__ubsan_handle_builtin_unreachable' because it conflicts with attribute 'const' [-Werror=attributes] This is actually a GCC's bug. In GCC internals __ubsan_handle_builtin_unreachable() declared with both 'noreturn' and 'const' attributes instead of only 'noreturn': https://gcc.gnu.org/bugzilla/show_bug.cgi?id=84210 Workaround this by removing the noreturn attribute. [aryabinin: add information about GCC bug in changelog] Link: http://lkml.kernel.org/r/20181107144516.4587-1-aryabinin@virtuozzo.com Signed-off-by: Arnd Bergmann <arnd@arndb.de> Signed-off-by: Andrey Ryabinin <aryabinin@virtuozzo.com> Acked-by: Olof Johansson <olof@lixom.net> Cc: <stable@vger.kernel.org> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org> --- lib/ubsan.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/lib/ubsan.c b/lib/ubsan.c index 59fee96c29a0f..e4162f59a81cc 100644 --- a/lib/ubsan.c +++ b/lib/ubsan.c @@ -427,8 +427,7 @@ void __ubsan_handle_shift_out_of_bounds(struct shift_out_of_bounds_data *data, EXPORT_SYMBOL(__ubsan_handle_shift_out_of_bounds); -void __noreturn -__ubsan_handle_builtin_unreachable(struct unreachable_data *data) +void __ubsan_handle_builtin_unreachable(struct unreachable_data *data) { unsigned long flags; From 1a413646931cb14442065cfc17561e50f5b5bb44 Mon Sep 17 00:00:00 2001 From: Yufen Yu <yuyufen@huawei.com> Date: Fri, 16 Nov 2018 15:08:39 -0800 Subject: [PATCH 433/443] tmpfs: make lseek(SEEK_DATA/SEK_HOLE) return ENXIO with a negative offset Other filesystems such as ext4, f2fs and ubifs all return ENXIO when lseek (SEEK_DATA or SEEK_HOLE) requests a negative offset. man 2 lseek says : EINVAL whence is not valid. Or: the resulting file offset would be : negative, or beyond the end of a seekable device. : : ENXIO whence is SEEK_DATA or SEEK_HOLE, and the file offset is beyond : the end of the file. Make tmpfs return ENXIO under these circumstances as well. After this, tmpfs also passes xfstests's generic/448. [akpm@linux-foundation.org: rewrite changelog] Link: http://lkml.kernel.org/r/1540434176-14349-1-git-send-email-yuyufen@huawei.com Signed-off-by: Yufen Yu <yuyufen@huawei.com> Reviewed-by: Andrew Morton <akpm@linux-foundation.org> Cc: Al Viro <viro@zeniv.linux.org.uk> Cc: Hugh Dickins <hughd@google.com> Cc: William Kucharski <william.kucharski@oracle.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org> --- mm/shmem.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/mm/shmem.c b/mm/shmem.c index ea26d7a0342d7..d44991ea5ed4b 100644 --- a/mm/shmem.c +++ b/mm/shmem.c @@ -2563,9 +2563,7 @@ static loff_t shmem_file_llseek(struct file *file, loff_t offset, int whence) inode_lock(inode); /* We're holding i_mutex so we can access i_size directly */ - if (offset < 0) - offset = -EINVAL; - else if (offset >= inode->i_size) + if (offset < 0 || offset >= inode->i_size) offset = -ENXIO; else { start = offset >> PAGE_SHIFT; From 6f4d29df66acd49303a99025046b85cabe7aa17a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= <u.kleine-koenig@pengutronix.de> Date: Fri, 16 Nov 2018 15:08:43 -0800 Subject: [PATCH 434/443] scripts/spdxcheck.py: make python3 compliant MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Without this change the following happens when using Python3 (3.6.6): $ echo "GPL-2.0" | python3 scripts/spdxcheck.py - FAIL: 'str' object has no attribute 'decode' Traceback (most recent call last): File "scripts/spdxcheck.py", line 253, in <module> parser.parse_lines(sys.stdin, args.maxlines, '-') File "scripts/spdxcheck.py", line 171, in parse_lines line = line.decode(locale.getpreferredencoding(False), errors='ignore') AttributeError: 'str' object has no attribute 'decode' So as the line is already a string, there is no need to decode it and the line can be dropped. /usr/bin/python on Arch is Python 3. So this would indeed be worth going into 4.19. Link: http://lkml.kernel.org/r/20181023070802.22558-1-u.kleine-koenig@pengutronix.de Signed-off-by: Uwe Kleine-König <u.kleine-koenig@pengutronix.de> Cc: Thomas Gleixner <tglx@linutronix.de> Cc: Joe Perches <joe@perches.com> Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org> Cc: <stable@vger.kernel.org> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org> --- scripts/spdxcheck.py | 1 - 1 file changed, 1 deletion(-) diff --git a/scripts/spdxcheck.py b/scripts/spdxcheck.py index 839e190bbd7a0..5056fb3b897d0 100755 --- a/scripts/spdxcheck.py +++ b/scripts/spdxcheck.py @@ -168,7 +168,6 @@ def parse_lines(self, fd, maxlines, fname): self.curline = 0 try: for line in fd: - line = line.decode(locale.getpreferredencoding(False), errors='ignore') self.curline += 1 if self.curline > maxlines: break From c63ae43ba53bc432b414fd73dd5f4b01fcb1ab43 Mon Sep 17 00:00:00 2001 From: Michal Hocko <mhocko@suse.com> Date: Fri, 16 Nov 2018 15:08:53 -0800 Subject: [PATCH 435/443] mm, page_alloc: check for max order in hot path Konstantin has noticed that kvmalloc might trigger the following warning: WARNING: CPU: 0 PID: 6676 at mm/vmstat.c:986 __fragmentation_index+0x54/0x60 [...] Call Trace: fragmentation_index+0x76/0x90 compaction_suitable+0x4f/0xf0 shrink_node+0x295/0x310 node_reclaim+0x205/0x250 get_page_from_freelist+0x649/0xad0 __alloc_pages_nodemask+0x12a/0x2a0 kmalloc_large_node+0x47/0x90 __kmalloc_node+0x22b/0x2e0 kvmalloc_node+0x3e/0x70 xt_alloc_table_info+0x3a/0x80 [x_tables] do_ip6t_set_ctl+0xcd/0x1c0 [ip6_tables] nf_setsockopt+0x44/0x60 SyS_setsockopt+0x6f/0xc0 do_syscall_64+0x67/0x120 entry_SYSCALL_64_after_hwframe+0x3d/0xa2 the problem is that we only check for an out of bound order in the slow path and the node reclaim might happen from the fast path already. This is fixable by making sure that kvmalloc doesn't ever use kmalloc for requests that are larger than KMALLOC_MAX_SIZE but this also shows that the code is rather fragile. A recent UBSAN report just underlines that by the following report UBSAN: Undefined behaviour in mm/page_alloc.c:3117:19 shift exponent 51 is too large for 32-bit type 'int' CPU: 0 PID: 6520 Comm: syz-executor1 Not tainted 4.19.0-rc2 #1 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS Bochs 01/01/2011 Call Trace: __dump_stack lib/dump_stack.c:77 [inline] dump_stack+0xd2/0x148 lib/dump_stack.c:113 ubsan_epilogue+0x12/0x94 lib/ubsan.c:159 __ubsan_handle_shift_out_of_bounds+0x2b6/0x30b lib/ubsan.c:425 __zone_watermark_ok+0x2c7/0x400 mm/page_alloc.c:3117 zone_watermark_fast mm/page_alloc.c:3216 [inline] get_page_from_freelist+0xc49/0x44c0 mm/page_alloc.c:3300 __alloc_pages_nodemask+0x21e/0x640 mm/page_alloc.c:4370 alloc_pages_current+0xcc/0x210 mm/mempolicy.c:2093 alloc_pages include/linux/gfp.h:509 [inline] __get_free_pages+0x12/0x60 mm/page_alloc.c:4414 dma_mem_alloc+0x36/0x50 arch/x86/include/asm/floppy.h:156 raw_cmd_copyin drivers/block/floppy.c:3159 [inline] raw_cmd_ioctl drivers/block/floppy.c:3206 [inline] fd_locked_ioctl+0xa00/0x2c10 drivers/block/floppy.c:3544 fd_ioctl+0x40/0x60 drivers/block/floppy.c:3571 __blkdev_driver_ioctl block/ioctl.c:303 [inline] blkdev_ioctl+0xb3c/0x1a30 block/ioctl.c:601 block_ioctl+0x105/0x150 fs/block_dev.c:1883 vfs_ioctl fs/ioctl.c:46 [inline] do_vfs_ioctl+0x1c0/0x1150 fs/ioctl.c:687 ksys_ioctl+0x9e/0xb0 fs/ioctl.c:702 __do_sys_ioctl fs/ioctl.c:709 [inline] __se_sys_ioctl fs/ioctl.c:707 [inline] __x64_sys_ioctl+0x7e/0xc0 fs/ioctl.c:707 do_syscall_64+0xc4/0x510 arch/x86/entry/common.c:290 entry_SYSCALL_64_after_hwframe+0x49/0xbe Note that this is not a kvmalloc path. It is just that the fast path really depends on having sanitzed order as well. Therefore move the order check to the fast path. Link: http://lkml.kernel.org/r/20181113094305.GM15120@dhcp22.suse.cz Signed-off-by: Michal Hocko <mhocko@suse.com> Reported-by: Konstantin Khlebnikov <khlebnikov@yandex-team.ru> Reported-by: Kyungtae Kim <kt0755@gmail.com> Acked-by: Vlastimil Babka <vbabka@suse.cz> Cc: Balbir Singh <bsingharora@gmail.com> Cc: Mel Gorman <mgorman@techsingularity.net> Cc: Pavel Tatashin <pavel.tatashin@microsoft.com> Cc: Oscar Salvador <osalvador@suse.de> Cc: Mike Rapoport <rppt@linux.vnet.ibm.com> Cc: Aaron Lu <aaron.lu@intel.com> Cc: Joonsoo Kim <iamjoonsoo.kim@lge.com> Cc: Byoungyoung Lee <lifeasageek@gmail.com> Cc: "Dae R. Jeong" <threeearcat@gmail.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org> --- mm/page_alloc.c | 20 +++++++++----------- 1 file changed, 9 insertions(+), 11 deletions(-) diff --git a/mm/page_alloc.c b/mm/page_alloc.c index a0e5ec0addd24..6847177dc4a1a 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -4060,17 +4060,6 @@ __alloc_pages_slowpath(gfp_t gfp_mask, unsigned int order, unsigned int cpuset_mems_cookie; int reserve_flags; - /* - * In the slowpath, we sanity check order to avoid ever trying to - * reclaim >= MAX_ORDER areas which will never succeed. Callers may - * be using allocators in order of preference for an area that is - * too large. - */ - if (order >= MAX_ORDER) { - WARN_ON_ONCE(!(gfp_mask & __GFP_NOWARN)); - return NULL; - } - /* * We also sanity check to catch abuse of atomic reserves being used by * callers that are not in atomic context. @@ -4364,6 +4353,15 @@ __alloc_pages_nodemask(gfp_t gfp_mask, unsigned int order, int preferred_nid, gfp_t alloc_mask; /* The gfp_t that was actually used for allocation */ struct alloc_context ac = { }; + /* + * There are several places where we assume that the order value is sane + * so bail out early if the request is out of bound. + */ + if (unlikely(order >= MAX_ORDER)) { + WARN_ON_ONCE(!(gfp_mask & __GFP_NOWARN)); + return NULL; + } + gfp_mask &= gfp_allowed_mask; alloc_mask = gfp_mask; if (!prepare_alloc_pages(gfp_mask, order, preferred_nid, nodemask, &ac, &alloc_mask, &alloc_flags)) From 45e79815b89149dc6698e71b587c86ffaf4062aa Mon Sep 17 00:00:00 2001 From: Chen Chang <rainccrun@gmail.com> Date: Fri, 16 Nov 2018 15:08:57 -0800 Subject: [PATCH 436/443] mm/memblock.c: fix a typo in __next_mem_pfn_range() comments Link: http://lkml.kernel.org/r/20181107100247.13359-1-rainccrun@gmail.com Signed-off-by: Chen Chang <rainccrun@gmail.com> Acked-by: Michal Hocko <mhocko@suse.com> Acked-by: Mike Rapoport <rppt@linux.ibm.com> Reviewed-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org> --- mm/memblock.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mm/memblock.c b/mm/memblock.c index 7df468c8ebc8c..9a2d5ae81ae1c 100644 --- a/mm/memblock.c +++ b/mm/memblock.c @@ -1179,7 +1179,7 @@ void __init_memblock __next_mem_range_rev(u64 *idx, int nid, #ifdef CONFIG_HAVE_MEMBLOCK_NODE_MAP /* - * Common iterator interface used to define for_each_mem_range(). + * Common iterator interface used to define for_each_mem_pfn_range(). */ void __init_memblock __next_mem_pfn_range(int *idx, int nid, unsigned long *out_start_pfn, From 9ff01193a20d391e8dbce4403dd5ef87c7eaaca6 Mon Sep 17 00:00:00 2001 From: Linus Torvalds <torvalds@linux-foundation.org> Date: Sun, 18 Nov 2018 13:33:44 -0800 Subject: [PATCH 437/443] Linux 4.20-rc3 --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 2f36db8978953..ddbf627cad8f5 100644 --- a/Makefile +++ b/Makefile @@ -2,7 +2,7 @@ VERSION = 4 PATCHLEVEL = 20 SUBLEVEL = 0 -EXTRAVERSION = -rc2 +EXTRAVERSION = -rc3 NAME = "People's Front" # *DOCUMENTATION* From faa2541f5b1afa8b6d777a73bc2f27d5c8c98695 Mon Sep 17 00:00:00 2001 From: Takashi Iwai <tiwai@suse.de> Date: Mon, 26 Nov 2018 17:47:44 +0100 Subject: [PATCH 438/443] leds: trigger: Introduce audio mute LED trigger MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This patch adds a new LED trigger for coupling the audio mixer change with the LED on laptops or other devices. Currently there are two trigger types, "audio-mute" and "audio-micmute". The audio driver triggers the LED brightness change via ledtrig_audio_set() call with the proper type (either mute or mic-mute). OTOH, the consumers may call ledtrig_audio_get() for the initial brightness value that may have been set by the audio driver beforehand. This new stuff will be used by HD-audio codec driver and some platform drivers (thinkpad_acpi and dell-laptop, also upcoming huawei-wmi). Acked-by: Jacek Anaszewski <jacek.anaszewski@gmail.com> Acked-by: Pavel Machek <pavel@ucw.cz> Acked-by: Pali Rohár <pali.rohar@gmail.com> Signed-off-by: Takashi Iwai <tiwai@suse.de> --- drivers/leds/trigger/Kconfig | 7 +++++ drivers/leds/trigger/Makefile | 1 + drivers/leds/trigger/ledtrig-audio.c | 44 ++++++++++++++++++++++++++++ include/linux/leds.h | 20 +++++++++++++ 4 files changed, 72 insertions(+) create mode 100644 drivers/leds/trigger/ledtrig-audio.c diff --git a/drivers/leds/trigger/Kconfig b/drivers/leds/trigger/Kconfig index b76fc3cdc8f8d..23cc85e2e0e57 100644 --- a/drivers/leds/trigger/Kconfig +++ b/drivers/leds/trigger/Kconfig @@ -136,4 +136,11 @@ config LEDS_TRIGGER_PATTERN which is a series of tuples, of brightness and duration (ms). If unsure, say N +config LEDS_TRIGGER_AUDIO + tristate "Audio Mute LED Trigger" + help + This allows LEDs to be controlled by audio drivers for following + the audio mute and mic-mute changes. + If unsure, say N + endif # LEDS_TRIGGERS diff --git a/drivers/leds/trigger/Makefile b/drivers/leds/trigger/Makefile index 9bcb64ee81231..733a83e2a7183 100644 --- a/drivers/leds/trigger/Makefile +++ b/drivers/leds/trigger/Makefile @@ -14,3 +14,4 @@ obj-$(CONFIG_LEDS_TRIGGER_CAMERA) += ledtrig-camera.o obj-$(CONFIG_LEDS_TRIGGER_PANIC) += ledtrig-panic.o obj-$(CONFIG_LEDS_TRIGGER_NETDEV) += ledtrig-netdev.o obj-$(CONFIG_LEDS_TRIGGER_PATTERN) += ledtrig-pattern.o +obj-$(CONFIG_LEDS_TRIGGER_AUDIO) += ledtrig-audio.o diff --git a/drivers/leds/trigger/ledtrig-audio.c b/drivers/leds/trigger/ledtrig-audio.c new file mode 100644 index 0000000000000..f76621e88482d --- /dev/null +++ b/drivers/leds/trigger/ledtrig-audio.c @@ -0,0 +1,44 @@ +// SPDX-License-Identifier: GPL-2.0 +// +// Audio Mute LED trigger +// + +#include <linux/kernel.h> +#include <linux/leds.h> +#include <linux/module.h> + +static struct led_trigger *ledtrig_audio[NUM_AUDIO_LEDS]; +static enum led_brightness audio_state[NUM_AUDIO_LEDS]; + +enum led_brightness ledtrig_audio_get(enum led_audio type) +{ + return audio_state[type]; +} +EXPORT_SYMBOL_GPL(ledtrig_audio_get); + +void ledtrig_audio_set(enum led_audio type, enum led_brightness state) +{ + audio_state[type] = state; + led_trigger_event(ledtrig_audio[type], state); +} +EXPORT_SYMBOL_GPL(ledtrig_audio_set); + +static int __init ledtrig_audio_init(void) +{ + led_trigger_register_simple("audio-mute", + &ledtrig_audio[LED_AUDIO_MUTE]); + led_trigger_register_simple("audio-micmute", + &ledtrig_audio[LED_AUDIO_MICMUTE]); + return 0; +} +module_init(ledtrig_audio_init); + +static void __exit ledtrig_audio_exit(void) +{ + led_trigger_unregister_simple(ledtrig_audio[LED_AUDIO_MUTE]); + led_trigger_unregister_simple(ledtrig_audio[LED_AUDIO_MICMUTE]); +} +module_exit(ledtrig_audio_exit); + +MODULE_DESCRIPTION("LED trigger for audio mute control"); +MODULE_LICENSE("GPL v2"); diff --git a/include/linux/leds.h b/include/linux/leds.h index 7393a316d9fad..580cbaef789a0 100644 --- a/include/linux/leds.h +++ b/include/linux/leds.h @@ -487,4 +487,24 @@ struct led_pattern { int brightness; }; +enum led_audio { + LED_AUDIO_MUTE, /* master mute LED */ + LED_AUDIO_MICMUTE, /* mic mute LED */ + NUM_AUDIO_LEDS +}; + +#if IS_ENABLED(CONFIG_LEDS_TRIGGER_AUDIO) +enum led_brightness ledtrig_audio_get(enum led_audio type); +void ledtrig_audio_set(enum led_audio type, enum led_brightness state); +#else +static inline enum led_brightness ledtrig_audio_get(enum led_audio type) +{ + return LED_OFF; +} +static inline void ledtrig_audio_set(enum led_audio type, + enum led_brightness state) +{ +} +#endif + #endif /* __LINUX_LEDS_H_INCLUDED */ From d00fa46e0a2c670d980af6e9e81b41ae3f9f02b5 Mon Sep 17 00:00:00 2001 From: Takashi Iwai <tiwai@suse.de> Date: Mon, 26 Nov 2018 17:47:45 +0100 Subject: [PATCH 439/443] platform/x86: dell-laptop: Add micmute LED trigger support MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This patch adds the LED trigger support for audio mic-mute control. As of this patch, the LED device isn't tied with the audio driver, and can be changed via user-space at "platform::micmute" sysfs entry. (This new prefix "platform" is the agreement among people for indicating the generic laptop / system-wide attribute.) The binding with HD-audio is still done via the existing exported dell_micmute_led_set(). It will be replaced with the LED trigger binding in later patches. Also this selects CONFIG_LEDS_TRIGGERS and CONFIG_LEDS_TRIGGERS_AUDIO unconditionally. Strictly speaking, these aren't 100% mandatory, but leaving these manual selections would lead to a functional regression easily once after converting from the dynamic symbol binding to the LEDs trigger in a later patch. Acked-by: Jacek Anaszewski <jacek.anaszewski@gmail.com> Acked-by: Pavel Machek <pavel@ucw.cz> Acked-by: Andy Shevchenko <andy.shevchenko@gmail.com> Acked-by: Pali Rohár <pali.rohar@gmail.com> Signed-off-by: Takashi Iwai <tiwai@suse.de> --- drivers/platform/x86/Kconfig | 2 ++ drivers/platform/x86/dell-laptop.c | 25 +++++++++++++++++++++++++ 2 files changed, 27 insertions(+) diff --git a/drivers/platform/x86/Kconfig b/drivers/platform/x86/Kconfig index 54f6a40c75c69..9b9cc3cc33e94 100644 --- a/drivers/platform/x86/Kconfig +++ b/drivers/platform/x86/Kconfig @@ -177,6 +177,8 @@ config DELL_LAPTOP select POWER_SUPPLY select LEDS_CLASS select NEW_LEDS + select LEDS_TRIGGERS + select LEDS_TRIGGER_AUDIO ---help--- This driver adds support for rfkill and backlight control to Dell laptops (except for some models covered by the Compal driver). diff --git a/drivers/platform/x86/dell-laptop.c b/drivers/platform/x86/dell-laptop.c index 06978c14c83b2..0db2dbf7b0d1c 100644 --- a/drivers/platform/x86/dell-laptop.c +++ b/drivers/platform/x86/dell-laptop.c @@ -2131,6 +2131,23 @@ int dell_micmute_led_set(int state) } EXPORT_SYMBOL_GPL(dell_micmute_led_set); +static int micmute_led_set(struct led_classdev *led_cdev, + enum led_brightness brightness) +{ + int state = brightness != LED_OFF; + int err; + + err = dell_micmute_led_set(state); + return err < 0 ? err : 0; +} + +static struct led_classdev micmute_led_cdev = { + .name = "platform::micmute", + .max_brightness = 1, + .brightness_set_blocking = micmute_led_set, + .default_trigger = "audio-micmute", +}; + static int __init dell_init(void) { struct calling_interface_token *token; @@ -2175,6 +2192,11 @@ static int __init dell_init(void) dell_laptop_register_notifier(&dell_laptop_notifier); + micmute_led_cdev.brightness = ledtrig_audio_get(LED_AUDIO_MICMUTE); + ret = led_classdev_register(&platform_device->dev, &micmute_led_cdev); + if (ret < 0) + goto fail_led; + if (acpi_video_get_backlight_type() != acpi_backlight_vendor) return 0; @@ -2220,6 +2242,8 @@ static int __init dell_init(void) fail_get_brightness: backlight_device_unregister(dell_backlight_device); fail_backlight: + led_classdev_unregister(&micmute_led_cdev); +fail_led: dell_cleanup_rfkill(); fail_rfkill: platform_device_del(platform_device); @@ -2239,6 +2263,7 @@ static void __exit dell_exit(void) touchpad_led_exit(); kbd_led_exit(); backlight_device_unregister(dell_backlight_device); + led_classdev_unregister(&micmute_led_cdev); dell_cleanup_rfkill(); if (platform_device) { platform_device_unregister(platform_device); From 4e1d092445a58b0a58d2743448a7aa17ae1cb0ef Mon Sep 17 00:00:00 2001 From: Takashi Iwai <tiwai@suse.de> Date: Mon, 26 Nov 2018 17:47:45 +0100 Subject: [PATCH 440/443] platform/x86: thinkpad_acpi: Add audio mute LED classdev support MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit In the upcoming change, the binding of audio mute / mic-mute LED controls will be switched with LED trigger. This patch is the last piece of preparation: adding the audio mute / mic-mute LED class devices to thinkpad_acpi driver. Two devices, platform::mute and platform::micmute, will be added for controlling the mute LED and mic-mute LED, respectively. The new prefix "platform" is the suggestion by upstream for indicating the generic laptop attribute. Also this selects CONFIG_LEDS_TRIGGERS and CONFIG_LEDS_TRIGGERS_AUDIO unconditionally. Strictly speaking, these aren't 100% mandatory, but leaving these manual selections would lead to a functional regression easily once after converting from the dynamic symbol binding to the LEDs trigger in a later patch. Acked-by: Jacek Anaszewski <jacek.anaszewski@gmail.com> Acked-by: Pavel Machek <pavel@ucw.cz> Acked-by: Andy Shevchenko <andy.shevchenko@gmail.com> Acked-by: Henrique de Moraes Holschuh <hmh@hmh.eng.br> Acked-by: Pali Rohár <pali.rohar@gmail.com> Signed-off-by: Takashi Iwai <tiwai@suse.de> --- drivers/platform/x86/Kconfig | 2 + drivers/platform/x86/thinkpad_acpi.c | 57 +++++++++++++++++++++++++--- 2 files changed, 53 insertions(+), 6 deletions(-) diff --git a/drivers/platform/x86/Kconfig b/drivers/platform/x86/Kconfig index 9b9cc3cc33e94..87f70e8f4dd00 100644 --- a/drivers/platform/x86/Kconfig +++ b/drivers/platform/x86/Kconfig @@ -495,6 +495,8 @@ config THINKPAD_ACPI select NVRAM select NEW_LEDS select LEDS_CLASS + select LEDS_TRIGGERS + select LEDS_TRIGGER_AUDIO ---help--- This is a driver for the IBM and Lenovo ThinkPad laptops. It adds support for Fn-Fx key combinations, Bluetooth control, video diff --git a/drivers/platform/x86/thinkpad_acpi.c b/drivers/platform/x86/thinkpad_acpi.c index fde08a997557f..3d2c1f5f22e20 100644 --- a/drivers/platform/x86/thinkpad_acpi.c +++ b/drivers/platform/x86/thinkpad_acpi.c @@ -9203,17 +9203,58 @@ int tpacpi_led_set(int whichled, bool on) } EXPORT_SYMBOL_GPL(tpacpi_led_set); +static int tpacpi_led_mute_set(struct led_classdev *led_cdev, + enum led_brightness brightness) +{ + return tpacpi_led_set(TPACPI_LED_MUTE, brightness != LED_OFF); +} + +static int tpacpi_led_micmute_set(struct led_classdev *led_cdev, + enum led_brightness brightness) +{ + return tpacpi_led_set(TPACPI_LED_MICMUTE, brightness != LED_OFF); +} + +static struct led_classdev mute_led_cdev[] = { + [TPACPI_LED_MUTE] = { + .name = "platform::mute", + .max_brightness = 1, + .brightness_set_blocking = tpacpi_led_mute_set, + .default_trigger = "audio-mute", + }, + [TPACPI_LED_MICMUTE] = { + .name = "platform::micmute", + .max_brightness = 1, + .brightness_set_blocking = tpacpi_led_micmute_set, + .default_trigger = "audio-micmute", + }, +}; + static int mute_led_init(struct ibm_init_struct *iibm) { + static enum led_audio types[] = { + [TPACPI_LED_MUTE] = LED_AUDIO_MUTE, + [TPACPI_LED_MICMUTE] = LED_AUDIO_MICMUTE, + }; acpi_handle temp; - int i; + int i, err; for (i = 0; i < TPACPI_LED_MAX; i++) { struct tp_led_table *t = &led_tables[i]; - if (ACPI_SUCCESS(acpi_get_handle(hkey_handle, t->name, &temp))) - mute_led_on_off(t, false); - else + if (ACPI_FAILURE(acpi_get_handle(hkey_handle, t->name, &temp))) { t->state = -ENODEV; + continue; + } + + mute_led_cdev[i].brightness = ledtrig_audio_get(types[i]); + err = led_classdev_register(&tpacpi_pdev->dev, &mute_led_cdev[i]); + if (err < 0) { + while (i--) { + if (led_tables[i].state >= 0) + led_classdev_unregister(&mute_led_cdev[i]); + } + return err; + } } return 0; } @@ -9222,8 +9263,12 @@ static void mute_led_exit(void) { int i; - for (i = 0; i < TPACPI_LED_MAX; i++) - tpacpi_led_set(i, false); + for (i = 0; i < TPACPI_LED_MAX; i++) { + if (led_tables[i].state >= 0) { + led_classdev_unregister(&mute_led_cdev[i]); + tpacpi_led_set(i, false); + } + } } static void mute_led_resume(void) From b3802783d0b58989d70a484aa2d50fb8ce40841b Mon Sep 17 00:00:00 2001 From: Takashi Iwai <tiwai@suse.de> Date: Mon, 26 Nov 2018 17:47:46 +0100 Subject: [PATCH 441/443] ALSA: hda - Support led audio trigger MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Now all relevant platform drivers are providing the LED audio trigger, we can switch the mute LED control with the LED trigger, finally. For the mic-mute LED trigger, a common fixup function, snd_hda_gen_fixup_micmute_led(), is provided to be called for the corresponding quirk entries. This sets up the capture sync hook with ledtrig_audio_set() call appropriately. For the mute LED trigger, which is done currently only for thinkpad_acpi, the call is replaced with ledtrig_audio_set() as well. Overall, the beauty of the new implementation is that the whole ugly bindings with request_symbol() are dropped, and also that it provides more flexibility to users. One potential behavior change by this patch is that the mute LED enum may be created on machines that actually have no LED device. In the former code, we did test-call and abort binding if the test failed. But with the LED-trigger binding, this test isn't possible, and the actual check is done in the LED class device side. So it's the downside of simpleness. Also, note that the HD-audio codec driver doesn't select CONFIG_LEDS and co by itself. It's supposed to be selected by the platform drivers instead. Acked-by: Jacek Anaszewski <jacek.anaszewski@gmail.com> Acked-by: Pavel Machek <pavel@ucw.cz> Acked-by: Pali Rohár <pali.rohar@gmail.com> Signed-off-by: Takashi Iwai <tiwai@suse.de> --- sound/pci/hda/dell_wmi_helper.c | 48 --------------------------------- sound/pci/hda/hda_generic.c | 31 +++++++++++++++++++++ sound/pci/hda/hda_generic.h | 2 ++ sound/pci/hda/patch_realtek.c | 17 +++++------- sound/pci/hda/thinkpad_helper.c | 43 +++++------------------------ 5 files changed, 46 insertions(+), 95 deletions(-) delete mode 100644 sound/pci/hda/dell_wmi_helper.c diff --git a/sound/pci/hda/dell_wmi_helper.c b/sound/pci/hda/dell_wmi_helper.c deleted file mode 100644 index bbd6c87a4ed69..0000000000000 --- a/sound/pci/hda/dell_wmi_helper.c +++ /dev/null @@ -1,48 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* Helper functions for Dell Mic Mute LED control; - * to be included from codec driver - */ - -#if IS_ENABLED(CONFIG_DELL_LAPTOP) -#include <linux/dell-led.h> - -static int (*dell_micmute_led_set_func)(int); - -static void dell_micmute_update(struct hda_codec *codec) -{ - struct hda_gen_spec *spec = codec->spec; - - dell_micmute_led_set_func(spec->micmute_led.led_value); -} - -static void alc_fixup_dell_wmi(struct hda_codec *codec, - const struct hda_fixup *fix, int action) -{ - bool removefunc = false; - - if (action == HDA_FIXUP_ACT_PROBE) { - if (!dell_micmute_led_set_func) - dell_micmute_led_set_func = symbol_request(dell_micmute_led_set); - if (!dell_micmute_led_set_func) { - codec_warn(codec, "Failed to find dell wmi symbol dell_micmute_led_set\n"); - return; - } - - removefunc = (dell_micmute_led_set_func(false) < 0) || - (snd_hda_gen_add_micmute_led(codec, - dell_micmute_update) < 0); - } - - if (dell_micmute_led_set_func && (action == HDA_FIXUP_ACT_FREE || removefunc)) { - symbol_put(dell_micmute_led_set); - dell_micmute_led_set_func = NULL; - } -} - -#else /* CONFIG_DELL_LAPTOP */ -static void alc_fixup_dell_wmi(struct hda_codec *codec, - const struct hda_fixup *fix, int action) -{ -} - -#endif /* CONFIG_DELL_LAPTOP */ diff --git a/sound/pci/hda/hda_generic.c b/sound/pci/hda/hda_generic.c index 276150f29cda9..4095cd7c56c63 100644 --- a/sound/pci/hda/hda_generic.c +++ b/sound/pci/hda/hda_generic.c @@ -29,6 +29,7 @@ #include <linux/string.h> #include <linux/bitops.h> #include <linux/module.h> +#include <linux/leds.h> #include <sound/core.h> #include <sound/jack.h> #include <sound/tlv.h> @@ -4035,6 +4036,36 @@ int snd_hda_gen_add_micmute_led(struct hda_codec *codec, } EXPORT_SYMBOL_GPL(snd_hda_gen_add_micmute_led); +#if IS_REACHABLE(CONFIG_LEDS_TRIGGER_AUDIO) +static void call_ledtrig_micmute(struct hda_codec *codec) +{ + struct hda_gen_spec *spec = codec->spec; + + ledtrig_audio_set(LED_AUDIO_MICMUTE, + spec->micmute_led.led_value ? LED_ON : LED_OFF); +} +#endif + +/** + * snd_hda_gen_fixup_micmute_led - A fixup for mic-mute LED trigger + * + * Pass this function to the quirk entry if another driver supports the + * audio mic-mute LED trigger. Then this will bind the mixer capture switch + * change with the LED. + * + * Note that this fixup has to be called after other fixup that sets + * cap_sync_hook. Otherwise the chaining wouldn't work. + */ +void snd_hda_gen_fixup_micmute_led(struct hda_codec *codec, + const struct hda_fixup *fix, int action) +{ +#if IS_REACHABLE(CONFIG_LEDS_TRIGGER_AUDIO) + if (action == HDA_FIXUP_ACT_PROBE) + snd_hda_gen_add_micmute_led(codec, call_ledtrig_micmute); +#endif +} +EXPORT_SYMBOL_GPL(snd_hda_gen_fixup_micmute_led); + /* * parse digital I/Os and set up NIDs in BIOS auto-parse mode */ diff --git a/sound/pci/hda/hda_generic.h b/sound/pci/hda/hda_generic.h index 10123664fa619..78d77042b05aa 100644 --- a/sound/pci/hda/hda_generic.h +++ b/sound/pci/hda/hda_generic.h @@ -357,5 +357,7 @@ int snd_hda_gen_fix_pin_power(struct hda_codec *codec, hda_nid_t pin); int snd_hda_gen_add_micmute_led(struct hda_codec *codec, void (*hook)(struct hda_codec *)); +void snd_hda_gen_fixup_micmute_led(struct hda_codec *codec, + const struct hda_fixup *fix, int action); #endif /* __SOUND_HDA_GENERIC_H */ diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c index fa61674a56050..993d34c141c28 100644 --- a/sound/pci/hda/patch_realtek.c +++ b/sound/pci/hda/patch_realtek.c @@ -5368,9 +5368,6 @@ static void alc_fixup_thinkpad_acpi(struct hda_codec *codec, hda_fixup_thinkpad_acpi(codec, fix, action); } -/* for dell wmi mic mute led */ -#include "dell_wmi_helper.c" - /* for alc295_fixup_hp_top_speakers */ #include "hp_x360_helper.c" @@ -5448,7 +5445,7 @@ enum { ALC292_FIXUP_TPT440_DOCK, ALC292_FIXUP_TPT440, ALC283_FIXUP_HEADSET_MIC, - ALC255_FIXUP_DELL_WMI_MIC_MUTE_LED, + ALC255_FIXUP_MIC_MUTE_LED, ALC282_FIXUP_ASPIRE_V5_PINS, ALC280_FIXUP_HP_GPIO4, ALC286_FIXUP_HP_GPIO_LED, @@ -5740,7 +5737,7 @@ static const struct hda_fixup alc269_fixups[] = { .type = HDA_FIXUP_FUNC, .v.func = alc_fixup_headset_mode, .chained = true, - .chain_id = ALC255_FIXUP_DELL_WMI_MIC_MUTE_LED + .chain_id = ALC255_FIXUP_MIC_MUTE_LED }, [ALC269_FIXUP_HEADSET_MODE_NO_HP_MIC] = { .type = HDA_FIXUP_FUNC, @@ -5966,7 +5963,7 @@ static const struct hda_fixup alc269_fixups[] = { .type = HDA_FIXUP_FUNC, .v.func = alc_fixup_headset_mode_alc255, .chained = true, - .chain_id = ALC255_FIXUP_DELL_WMI_MIC_MUTE_LED + .chain_id = ALC255_FIXUP_MIC_MUTE_LED }, [ALC255_FIXUP_HEADSET_MODE_NO_HP_MIC] = { .type = HDA_FIXUP_FUNC, @@ -6001,9 +5998,9 @@ static const struct hda_fixup alc269_fixups[] = { { }, }, }, - [ALC255_FIXUP_DELL_WMI_MIC_MUTE_LED] = { + [ALC255_FIXUP_MIC_MUTE_LED] = { .type = HDA_FIXUP_FUNC, - .v.func = alc_fixup_dell_wmi, + .v.func = snd_hda_gen_fixup_micmute_led, }, [ALC282_FIXUP_ASPIRE_V5_PINS] = { .type = HDA_FIXUP_PINS, @@ -6062,7 +6059,7 @@ static const struct hda_fixup alc269_fixups[] = { .type = HDA_FIXUP_FUNC, .v.func = alc_fixup_headset_mode_dell_alc288, .chained = true, - .chain_id = ALC255_FIXUP_DELL_WMI_MIC_MUTE_LED + .chain_id = ALC255_FIXUP_MIC_MUTE_LED }, [ALC288_FIXUP_DELL1_MIC_NO_PRESENCE] = { .type = HDA_FIXUP_PINS, @@ -6719,7 +6716,7 @@ static const struct hda_model_fixup alc269_fixup_models[] = { {.id = ALC255_FIXUP_DELL2_MIC_NO_PRESENCE, .name = "alc255-dell2"}, {.id = ALC293_FIXUP_DELL1_MIC_NO_PRESENCE, .name = "alc293-dell1"}, {.id = ALC283_FIXUP_HEADSET_MIC, .name = "alc283-headset"}, - {.id = ALC255_FIXUP_DELL_WMI_MIC_MUTE_LED, .name = "alc255-dell-mute"}, + {.id = ALC255_FIXUP_MIC_MUTE_LED, .name = "alc255-dell-mute"}, {.id = ALC282_FIXUP_ASPIRE_V5_PINS, .name = "aspire-v5"}, {.id = ALC280_FIXUP_HP_GPIO4, .name = "hp-gpio4"}, {.id = ALC286_FIXUP_HP_GPIO_LED, .name = "hp-gpio-led"}, diff --git a/sound/pci/hda/thinkpad_helper.c b/sound/pci/hda/thinkpad_helper.c index 568575b72f2f7..4089feb8c68e3 100644 --- a/sound/pci/hda/thinkpad_helper.c +++ b/sound/pci/hda/thinkpad_helper.c @@ -3,12 +3,11 @@ * to be included from codec driver */ -#if IS_ENABLED(CONFIG_THINKPAD_ACPI) +#if IS_ENABLED(CONFIG_THINKPAD_ACPI) && IS_REACHABLE(CONFIG_LEDS_TRIGGER_AUDIO) #include <linux/acpi.h> -#include <linux/thinkpad_acpi.h> +#include <linux/leds.h> -static int (*led_set_func)(int, bool); static void (*old_vmaster_hook)(void *, int); static bool is_thinkpad(struct hda_codec *codec) @@ -23,50 +22,20 @@ static void update_tpacpi_mute_led(void *private_data, int enabled) if (old_vmaster_hook) old_vmaster_hook(private_data, enabled); - if (led_set_func) - led_set_func(TPACPI_LED_MUTE, !enabled); -} - -static void update_tpacpi_micmute(struct hda_codec *codec) -{ - struct hda_gen_spec *spec = codec->spec; - - led_set_func(TPACPI_LED_MICMUTE, spec->micmute_led.led_value); + ledtrig_audio_set(LED_AUDIO_MUTE, enabled ? LED_OFF : LED_ON); } static void hda_fixup_thinkpad_acpi(struct hda_codec *codec, const struct hda_fixup *fix, int action) { struct hda_gen_spec *spec = codec->spec; - bool removefunc = false; if (action == HDA_FIXUP_ACT_PROBE) { if (!is_thinkpad(codec)) return; - if (!led_set_func) - led_set_func = symbol_request(tpacpi_led_set); - if (!led_set_func) { - codec_warn(codec, - "Failed to find thinkpad-acpi symbol tpacpi_led_set\n"); - return; - } - - removefunc = true; - if (led_set_func(TPACPI_LED_MUTE, false) >= 0) { - old_vmaster_hook = spec->vmaster_mute.hook; - spec->vmaster_mute.hook = update_tpacpi_mute_led; - removefunc = false; - } - if (led_set_func(TPACPI_LED_MICMUTE, false) >= 0 && - !snd_hda_gen_add_micmute_led(codec, - update_tpacpi_micmute)) - removefunc = false; - } - - if (led_set_func && (action == HDA_FIXUP_ACT_FREE || removefunc)) { - symbol_put(tpacpi_led_set); - led_set_func = NULL; - old_vmaster_hook = NULL; + old_vmaster_hook = spec->vmaster_mute.hook; + spec->vmaster_mute.hook = update_tpacpi_mute_led; + snd_hda_gen_fixup_micmute_led(codec, fix, action); } } From f783e128a6f1484d72ceab06d483ea32df0ce333 Mon Sep 17 00:00:00 2001 From: Takashi Iwai <tiwai@suse.de> Date: Mon, 26 Nov 2018 17:47:46 +0100 Subject: [PATCH 442/443] platform/x86: dell-laptop: Drop superfluous exported function MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Since we've switched to the LED trigger for binding with HD-audio, we can drop the exported function as well as the whole linux/dell-led.h. Acked-by: Jacek Anaszewski <jacek.anaszewski@gmail.com> Acked-by: Pavel Machek <pavel@ucw.cz> Acked-by: Andy Shevchenko <andy.shevchenko@gmail.com> Acked-by: Pali Rohár <pali.rohar@gmail.com> Signed-off-by: Takashi Iwai <tiwai@suse.de> --- drivers/platform/x86/dell-laptop.c | 22 +++++----------------- include/linux/dell-led.h | 7 ------- 2 files changed, 5 insertions(+), 24 deletions(-) delete mode 100644 include/linux/dell-led.h diff --git a/drivers/platform/x86/dell-laptop.c b/drivers/platform/x86/dell-laptop.c index 0db2dbf7b0d1c..fb071e6a50582 100644 --- a/drivers/platform/x86/dell-laptop.c +++ b/drivers/platform/x86/dell-laptop.c @@ -29,7 +29,6 @@ #include <linux/mm.h> #include <linux/i8042.h> #include <linux/debugfs.h> -#include <linux/dell-led.h> #include <linux/seq_file.h> #include <acpi/video.h> #include "dell-rbtn.h" @@ -2109,17 +2108,17 @@ static struct notifier_block dell_laptop_notifier = { .notifier_call = dell_laptop_notifier_call, }; -int dell_micmute_led_set(int state) +static int micmute_led_set(struct led_classdev *led_cdev, + enum led_brightness brightness) { struct calling_interface_buffer buffer; struct calling_interface_token *token; + int state = brightness != LED_OFF; if (state == 0) token = dell_smbios_find_token(GLOBAL_MIC_MUTE_DISABLE); - else if (state == 1) - token = dell_smbios_find_token(GLOBAL_MIC_MUTE_ENABLE); else - return -EINVAL; + token = dell_smbios_find_token(GLOBAL_MIC_MUTE_ENABLE); if (!token) return -ENODEV; @@ -2127,18 +2126,7 @@ int dell_micmute_led_set(int state) dell_fill_request(&buffer, token->location, token->value, 0, 0); dell_send_request(&buffer, CLASS_TOKEN_WRITE, SELECT_TOKEN_STD); - return state; -} -EXPORT_SYMBOL_GPL(dell_micmute_led_set); - -static int micmute_led_set(struct led_classdev *led_cdev, - enum led_brightness brightness) -{ - int state = brightness != LED_OFF; - int err; - - err = dell_micmute_led_set(state); - return err < 0 ? err : 0; + return 0; } static struct led_classdev micmute_led_cdev = { diff --git a/include/linux/dell-led.h b/include/linux/dell-led.h deleted file mode 100644 index 92521471517fe..0000000000000 --- a/include/linux/dell-led.h +++ /dev/null @@ -1,7 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -#ifndef __DELL_LED_H__ -#define __DELL_LED_H__ - -int dell_micmute_led_set(int on); - -#endif From 9e908a180e6a90fa102d5d3f96ca86825f43e4fb Mon Sep 17 00:00:00 2001 From: Takashi Iwai <tiwai@suse.de> Date: Mon, 26 Nov 2018 17:47:47 +0100 Subject: [PATCH 443/443] platform/x86: thinkpad_acpi: Drop superfluous exported function MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Since we've switched to the LED trigger for binding with HD-audio, we can drop the exported function as well as the whole linux/thinkpad_acpi.h. The own TPACPI_LED_MUTE and TPACPI_LED_MICMUTE definitions are replaced with the identical ones for LEDS, i.e. LED_AUDIO_MUTE and LED_AUDIO_MICMUTE, respectively. They are no longer needed as referred only locally. Acked-by: Jacek Anaszewski <jacek.anaszewski@gmail.com> Acked-by: Pavel Machek <pavel@ucw.cz> Acked-by: Andy Shevchenko <andy.shevchenko@gmail.com> Acked-by: Henrique de Moraes Holschuh <hmh@hmh.eng.br> Acked-by: Pali Rohár <pali.rohar@gmail.com> Signed-off-by: Takashi Iwai <tiwai@suse.de> --- drivers/platform/x86/thinkpad_acpi.c | 30 ++++++++++------------------ include/linux/thinkpad_acpi.h | 16 --------------- 2 files changed, 11 insertions(+), 35 deletions(-) delete mode 100644 include/linux/thinkpad_acpi.h diff --git a/drivers/platform/x86/thinkpad_acpi.c b/drivers/platform/x86/thinkpad_acpi.c index 3d2c1f5f22e20..21ffb961585ae 100644 --- a/drivers/platform/x86/thinkpad_acpi.c +++ b/drivers/platform/x86/thinkpad_acpi.c @@ -81,7 +81,6 @@ #include <linux/acpi.h> #include <linux/pci_ids.h> #include <linux/power_supply.h> -#include <linux/thinkpad_acpi.h> #include <sound/core.h> #include <sound/control.h> #include <sound/initval.h> @@ -9150,6 +9149,7 @@ static struct ibm_struct fan_driver_data = { * Mute LED subdriver */ +#define TPACPI_LED_MAX 2 struct tp_led_table { acpi_string name; @@ -9158,13 +9158,13 @@ struct tp_led_table { int state; }; -static struct tp_led_table led_tables[] = { - [TPACPI_LED_MUTE] = { +static struct tp_led_table led_tables[TPACPI_LED_MAX] = { + [LED_AUDIO_MUTE] = { .name = "SSMS", .on_value = 1, .off_value = 0, }, - [TPACPI_LED_MICMUTE] = { + [LED_AUDIO_MICMUTE] = { .name = "MMTS", .on_value = 2, .off_value = 0, @@ -9189,40 +9189,36 @@ static int mute_led_on_off(struct tp_led_table *t, bool state) return state; } -int tpacpi_led_set(int whichled, bool on) +static int tpacpi_led_set(int whichled, bool on) { struct tp_led_table *t; - if (whichled < 0 || whichled >= TPACPI_LED_MAX) - return -EINVAL; - t = &led_tables[whichled]; if (t->state < 0 || t->state == on) return t->state; return mute_led_on_off(t, on); } -EXPORT_SYMBOL_GPL(tpacpi_led_set); static int tpacpi_led_mute_set(struct led_classdev *led_cdev, enum led_brightness brightness) { - return tpacpi_led_set(TPACPI_LED_MUTE, brightness != LED_OFF); + return tpacpi_led_set(LED_AUDIO_MUTE, brightness != LED_OFF); } static int tpacpi_led_micmute_set(struct led_classdev *led_cdev, enum led_brightness brightness) { - return tpacpi_led_set(TPACPI_LED_MICMUTE, brightness != LED_OFF); + return tpacpi_led_set(LED_AUDIO_MICMUTE, brightness != LED_OFF); } -static struct led_classdev mute_led_cdev[] = { - [TPACPI_LED_MUTE] = { +static struct led_classdev mute_led_cdev[TPACPI_LED_MAX] = { + [LED_AUDIO_MUTE] = { .name = "platform::mute", .max_brightness = 1, .brightness_set_blocking = tpacpi_led_mute_set, .default_trigger = "audio-mute", }, - [TPACPI_LED_MICMUTE] = { + [LED_AUDIO_MICMUTE] = { .name = "platform::micmute", .max_brightness = 1, .brightness_set_blocking = tpacpi_led_micmute_set, @@ -9232,10 +9228,6 @@ static struct led_classdev mute_led_cdev[] = { static int mute_led_init(struct ibm_init_struct *iibm) { - static enum led_audio types[] = { - [TPACPI_LED_MUTE] = LED_AUDIO_MUTE, - [TPACPI_LED_MICMUTE] = LED_AUDIO_MICMUTE, - }; acpi_handle temp; int i, err; @@ -9246,7 +9238,7 @@ static int mute_led_init(struct ibm_init_struct *iibm) continue; } - mute_led_cdev[i].brightness = ledtrig_audio_get(types[i]); + mute_led_cdev[i].brightness = ledtrig_audio_get(i); err = led_classdev_register(&tpacpi_pdev->dev, &mute_led_cdev[i]); if (err < 0) { while (i--) { diff --git a/include/linux/thinkpad_acpi.h b/include/linux/thinkpad_acpi.h deleted file mode 100644 index 9fb317970c012..0000000000000 --- a/include/linux/thinkpad_acpi.h +++ /dev/null @@ -1,16 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -#ifndef __THINKPAD_ACPI_H__ -#define __THINKPAD_ACPI_H__ - -/* These two functions return 0 if success, or negative error code - (e g -ENODEV if no led present) */ - -enum { - TPACPI_LED_MUTE, - TPACPI_LED_MICMUTE, - TPACPI_LED_MAX, -}; - -int tpacpi_led_set(int whichled, bool on); - -#endif