From f5a9f0ca40c74547e28e0cb30973df5577dfbaec Mon Sep 17 00:00:00 2001 From: Michal Kazior Date: Fri, 30 May 2014 23:49:57 +0300 Subject: [PATCH 001/268] ath10k: remove unnecessary htt rx corruption check While fixing a bug reported by Avery I went ahead and added a warning suspecting there might be something more to the bug. This ended up with people reporting they see warnings during heavy traffic. This bought me some time and helped me understand the problem better - apparently fw/hw can report a chained msdus as follows: 1 msdu, 1 chained, 1 msdu (0 length). The patch removes the extra check but leaves the other change that fixed the original skb_push panic bug (msdu_chaining was overwritten in an unfortunate way which made the above example to be treated as non-chained case). Reported-by: Yeoh Chun-Yeow Reported-by: Tim Harvey Signed-off-by: Michal Kazior Signed-off-by: Kalle Valo --- drivers/net/wireless/ath/ath10k/htt_rx.c | 18 ------------------ 1 file changed, 18 deletions(-) diff --git a/drivers/net/wireless/ath/ath10k/htt_rx.c b/drivers/net/wireless/ath/ath10k/htt_rx.c index 6c102b1312ff9..eebc860c36550 100644 --- a/drivers/net/wireless/ath/ath10k/htt_rx.c +++ b/drivers/net/wireless/ath/ath10k/htt_rx.c @@ -312,7 +312,6 @@ static int ath10k_htt_rx_amsdu_pop(struct ath10k_htt *htt, int msdu_len, msdu_chaining = 0; struct sk_buff *msdu; struct htt_rx_desc *rx_desc; - bool corrupted = false; lockdep_assert_held(&htt->rx_ring.lock); @@ -439,9 +438,6 @@ static int ath10k_htt_rx_amsdu_pop(struct ath10k_htt *htt, last_msdu = __le32_to_cpu(rx_desc->msdu_end.info0) & RX_MSDU_END_INFO0_LAST_MSDU; - if (msdu_chaining && !last_msdu) - corrupted = true; - if (last_msdu) { msdu->next = NULL; break; @@ -456,20 +452,6 @@ static int ath10k_htt_rx_amsdu_pop(struct ath10k_htt *htt, if (*head_msdu == NULL) msdu_chaining = -1; - /* - * Apparently FW sometimes reports weird chained MSDU sequences with - * more than one rx descriptor. This seems like a bug but needs more - * analyzing. For the time being fix it by dropping such sequences to - * avoid blowing up the host system. - */ - if (corrupted) { - ath10k_warn("failed to pop chained msdus, dropping\n"); - ath10k_htt_rx_free_msdu_chain(*head_msdu); - *head_msdu = NULL; - *tail_msdu = NULL; - msdu_chaining = -EINVAL; - } - /* * Don't refill the ring yet. * From dfa413de1e4388818f7dcdce0a90d6212e74895b Mon Sep 17 00:00:00 2001 From: Bartosz Markowski Date: Mon, 2 Jun 2014 21:19:45 +0300 Subject: [PATCH 002/268] ath10k: fix 8th virtual AP interface with DFS Firmware 10.x supports up to 8 virtual AP interfaces, but in a DFS channel it was possible to create only 7 interfaces as ath10k internal creates a monitor interface for DFS. Previous vdev map initialization was missing enough space for 8 + 1 vdevs due to wrong define used and that's why there was no space for 8th interface. Use the correct define TARGET_10X_NUM_VDEVS with 10.x firmware to make it possible to create the 8th virtual interface. Signed-off-by: Bartosz Markowski Signed-off-by: Kalle Valo --- drivers/net/wireless/ath/ath10k/core.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/drivers/net/wireless/ath/ath10k/core.c b/drivers/net/wireless/ath/ath10k/core.c index 82017f56e6613..e6c56c5bb0f60 100644 --- a/drivers/net/wireless/ath/ath10k/core.c +++ b/drivers/net/wireless/ath/ath10k/core.c @@ -795,7 +795,11 @@ int ath10k_core_start(struct ath10k *ar) if (status) goto err_htc_stop; - ar->free_vdev_map = (1 << TARGET_NUM_VDEVS) - 1; + if (test_bit(ATH10K_FW_FEATURE_WMI_10X, ar->fw_features)) + ar->free_vdev_map = (1 << TARGET_10X_NUM_VDEVS) - 1; + else + ar->free_vdev_map = (1 << TARGET_NUM_VDEVS) - 1; + INIT_LIST_HEAD(&ar->arvifs); if (!test_bit(ATH10K_FLAG_FIRST_BOOT_DONE, &ar->dev_flags)) From 4856fbd12d69965d3ab680c686222db93872728d Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Wed, 11 Jun 2014 11:49:31 -0300 Subject: [PATCH 003/268] [media] staging: tighten omap4iss dependencies The OMAP4 camera support depends on I2C and VIDEO_V4L2, both of which can be loadable modules. This causes build failures if we want the camera driver to be built-in. This can be solved by turning the option into "tristate", which unfortunately causes another problem, because the driver incorrectly calls a platform-internal interface for omap4_ctrl_pad_readl/omap4_ctrl_pad_writel. Instead, this patch just forbids the invalid configurations and ensures that the driver can only be built if all its dependencies are built-in. Cc: stable@vger.kernel.org Signed-off-by: Arnd Bergmann Signed-off-by: Laurent Pinchart Signed-off-by: Mauro Carvalho Chehab --- drivers/staging/media/omap4iss/Kconfig | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/staging/media/omap4iss/Kconfig b/drivers/staging/media/omap4iss/Kconfig index 78b0fba7047e9..8afc6fee40c54 100644 --- a/drivers/staging/media/omap4iss/Kconfig +++ b/drivers/staging/media/omap4iss/Kconfig @@ -1,6 +1,6 @@ config VIDEO_OMAP4 bool "OMAP 4 Camera support" - depends on VIDEO_V4L2 && VIDEO_V4L2_SUBDEV_API && I2C && ARCH_OMAP4 + depends on VIDEO_V4L2=y && VIDEO_V4L2_SUBDEV_API && I2C=y && ARCH_OMAP4 select VIDEOBUF2_DMA_CONTIG ---help--- Driver for an OMAP 4 ISS controller. From eefae30a1b3aabab6085be2ca0e314021253daa2 Mon Sep 17 00:00:00 2001 From: Antti Palosaari Date: Fri, 13 Jun 2014 11:08:25 -0300 Subject: [PATCH 004/268] [media] si2168: add one missing parenthesis Fix following warnings: si2168_cmd_execute() warn: add some parenthesis here? si2168_cmd_execute() warn: maybe use && instead of & Reported-by: Dan Carpenter Signed-off-by: Antti Palosaari Signed-off-by: Mauro Carvalho Chehab --- drivers/media/dvb-frontends/si2168.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/media/dvb-frontends/si2168.c b/drivers/media/dvb-frontends/si2168.c index 8637d2ed76238..f20573649043f 100644 --- a/drivers/media/dvb-frontends/si2168.c +++ b/drivers/media/dvb-frontends/si2168.c @@ -60,7 +60,7 @@ static int si2168_cmd_execute(struct si2168 *s, struct si2168_cmd *cmd) jiffies_to_msecs(jiffies) - (jiffies_to_msecs(timeout) - TIMEOUT)); - if (!(cmd->args[0] >> 7) & 0x01) { + if (!((cmd->args[0] >> 7) & 0x01)) { ret = -ETIMEDOUT; goto err_mutex_unlock; } From a811e6ec87d910faceda561fae9b0088d70ee831 Mon Sep 17 00:00:00 2001 From: Antti Palosaari Date: Fri, 13 Jun 2014 11:19:07 -0300 Subject: [PATCH 005/268] [media] si2157: add one missing parenthesis Fix following warnings: si2157_cmd_execute() warn: add some parenthesis here? si2157_cmd_execute() warn: maybe use && instead of & Reported-by: Dan Carpenter Signed-off-by: Antti Palosaari Signed-off-by: Mauro Carvalho Chehab --- drivers/media/tuners/si2157.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/media/tuners/si2157.c b/drivers/media/tuners/si2157.c index 271a752cee541..fa4cc7b880aa4 100644 --- a/drivers/media/tuners/si2157.c +++ b/drivers/media/tuners/si2157.c @@ -57,7 +57,7 @@ static int si2157_cmd_execute(struct si2157 *s, struct si2157_cmd *cmd) jiffies_to_msecs(jiffies) - (jiffies_to_msecs(timeout) - TIMEOUT)); - if (!(buf[0] >> 7) & 0x01) { + if (!((buf[0] >> 7) & 0x01)) { ret = -ETIMEDOUT; goto err_mutex_unlock; } else { From 0c76e68d6ec6ade4dd0ae15fb08a827525fec3a2 Mon Sep 17 00:00:00 2001 From: Antti Palosaari Date: Fri, 13 Jun 2014 19:29:55 -0300 Subject: [PATCH 006/268] [media] si2168: firmware download fix First 8 bytes belonging to firmware image were hard-coded and uploaded by the driver mistakenly. Introduce new corrected firmware file and remove those 8 bytes from the driver. New firmware image could be extracted from the PCTV 292e driver CD using following command: $ dd if=/TVC 6.4.8/Driver/PCTV Empia/emOEM.sys ibs=1 skip=1089408 count=2728 of=dvb-demod-si2168-02.fw $ md5sum dvb-demod-si2168-02.fw d8da7ff67cd56cd8aa4e101aea45e052 dvb-demod-si2168-02.fw $ sudo cp dvb-demod-si2168-02.fw /lib/firmware/ Signed-off-by: Antti Palosaari Signed-off-by: Mauro Carvalho Chehab --- drivers/media/dvb-frontends/si2168.c | 14 -------------- drivers/media/dvb-frontends/si2168_priv.h | 2 +- 2 files changed, 1 insertion(+), 15 deletions(-) diff --git a/drivers/media/dvb-frontends/si2168.c b/drivers/media/dvb-frontends/si2168.c index f20573649043f..2e3cdcfa0a675 100644 --- a/drivers/media/dvb-frontends/si2168.c +++ b/drivers/media/dvb-frontends/si2168.c @@ -485,20 +485,6 @@ static int si2168_init(struct dvb_frontend *fe) if (ret) goto err; - cmd.args[0] = 0x05; - cmd.args[1] = 0x00; - cmd.args[2] = 0xaa; - cmd.args[3] = 0x4d; - cmd.args[4] = 0x56; - cmd.args[5] = 0x40; - cmd.args[6] = 0x00; - cmd.args[7] = 0x00; - cmd.wlen = 8; - cmd.rlen = 1; - ret = si2168_cmd_execute(s, &cmd); - if (ret) - goto err; - /* cold state - try to download firmware */ dev_info(&s->client->dev, "%s: found a '%s' in cold state\n", KBUILD_MODNAME, si2168_ops.info.name); diff --git a/drivers/media/dvb-frontends/si2168_priv.h b/drivers/media/dvb-frontends/si2168_priv.h index 2a343e896f403..53f7f06ae3437 100644 --- a/drivers/media/dvb-frontends/si2168_priv.h +++ b/drivers/media/dvb-frontends/si2168_priv.h @@ -22,7 +22,7 @@ #include #include -#define SI2168_FIRMWARE "dvb-demod-si2168-01.fw" +#define SI2168_FIRMWARE "dvb-demod-si2168-02.fw" /* state struct */ struct si2168 { From f71920efb1066d71d74811e1dbed658173adf9bf Mon Sep 17 00:00:00 2001 From: Rickard Strandqvist Date: Sat, 14 Jun 2014 08:37:09 -0300 Subject: [PATCH 007/268] [media] media: v4l2-core: v4l2-dv-timings.c: Cleaning up code wrong value used in aspect ratio Wrong value used in same cases for the aspect ratio. Signed-off-by: Rickard Strandqvist Acked-by: Lad, Prabhakar Cc: stable@vger.kernel.org # for v3.12 and up Signed-off-by: Hans Verkuil Signed-off-by: Mauro Carvalho Chehab --- drivers/media/v4l2-core/v4l2-dv-timings.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/media/v4l2-core/v4l2-dv-timings.c b/drivers/media/v4l2-core/v4l2-dv-timings.c index 4ae54caadd037..ce1c9f5d9dee1 100644 --- a/drivers/media/v4l2-core/v4l2-dv-timings.c +++ b/drivers/media/v4l2-core/v4l2-dv-timings.c @@ -610,10 +610,10 @@ struct v4l2_fract v4l2_calc_aspect_ratio(u8 hor_landscape, u8 vert_portrait) aspect.denominator = 9; } else if (ratio == 34) { aspect.numerator = 4; - aspect.numerator = 3; + aspect.denominator = 3; } else if (ratio == 68) { aspect.numerator = 15; - aspect.numerator = 9; + aspect.denominator = 9; } else { aspect.numerator = hor_landscape + 99; aspect.denominator = 100; From 13936af3d2f04f173a83cc050dbc4b20d8562b81 Mon Sep 17 00:00:00 2001 From: Hans Verkuil Date: Mon, 16 Jun 2014 04:32:37 -0300 Subject: [PATCH 008/268] [media] saa7134: use unlocked_ioctl instead of ioctl The saa7134 driver uses core-locking, so there is no longer any need to use the ioctl op instead of the unlocked_ioctl op. This change was forgotten for the saa7134-empress.c, so fix this. Signed-off-by: Hans Verkuil Signed-off-by: Mauro Carvalho Chehab --- drivers/media/pci/saa7134/saa7134-empress.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/media/pci/saa7134/saa7134-empress.c b/drivers/media/pci/saa7134/saa7134-empress.c index e65c760e4e8bb..0006d6bf8c18a 100644 --- a/drivers/media/pci/saa7134/saa7134-empress.c +++ b/drivers/media/pci/saa7134/saa7134-empress.c @@ -179,7 +179,7 @@ static const struct v4l2_file_operations ts_fops = .read = vb2_fop_read, .poll = vb2_fop_poll, .mmap = vb2_fop_mmap, - .ioctl = video_ioctl2, + .unlocked_ioctl = video_ioctl2, }; static const struct v4l2_ioctl_ops ts_ioctl_ops = { From d755330c5e0658d8056242b5b81e2f44ed7a96d8 Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Sun, 15 Jun 2014 10:22:15 +0200 Subject: [PATCH 009/268] perf tools: Fix segfault in cumulative.callchain report When cumulative callchain mode is on, we could get samples with with no actual hits. This breaks the assumption of the annotation code, that each sample has annotation counts allocated and leads to segfault. Fixing this by additional checks for annotation stats. Acked-by: Namhyung Kim Acked-by: Arnaldo Carvalho de Melo Cc: Arnaldo Carvalho de Melo Cc: Corey Ashford Cc: David Ahern Cc: Frederic Weisbecker Cc: Ingo Molnar Cc: Namhyung Kim Cc: Paul Mackerras Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/1402821332-12419-1-git-send-email-jolsa@kernel.org Signed-off-by: Jiri Olsa --- tools/perf/ui/browsers/hists.c | 21 ++++++++++++++++----- 1 file changed, 16 insertions(+), 5 deletions(-) diff --git a/tools/perf/ui/browsers/hists.c b/tools/perf/ui/browsers/hists.c index 52c03fbbba177..04a229aa5c0fd 100644 --- a/tools/perf/ui/browsers/hists.c +++ b/tools/perf/ui/browsers/hists.c @@ -17,6 +17,7 @@ #include "../util.h" #include "../ui.h" #include "map.h" +#include "annotate.h" struct hist_browser { struct ui_browser b; @@ -1593,13 +1594,18 @@ static int perf_evsel__hists_browse(struct perf_evsel *evsel, int nr_events, bi->to.sym->name) > 0) annotate_t = nr_options++; } else { - if (browser->selection != NULL && browser->selection->sym != NULL && - !browser->selection->map->dso->annotate_warned && - asprintf(&options[nr_options], "Annotate %s", - browser->selection->sym->name) > 0) - annotate = nr_options++; + !browser->selection->map->dso->annotate_warned) { + struct annotation *notes; + + notes = symbol__annotation(browser->selection->sym); + + if (notes->src && + asprintf(&options[nr_options], "Annotate %s", + browser->selection->sym->name) > 0) + annotate = nr_options++; + } } if (thread != NULL && @@ -1656,6 +1662,7 @@ static int perf_evsel__hists_browse(struct perf_evsel *evsel, int nr_events, if (choice == annotate || choice == annotate_t || choice == annotate_f) { struct hist_entry *he; + struct annotation *notes; int err; do_annotate: if (!objdump_path && perf_session_env__lookup_objdump(env)) @@ -1679,6 +1686,10 @@ static int perf_evsel__hists_browse(struct perf_evsel *evsel, int nr_events, he->ms.map = he->branch_info->to.map; } + notes = symbol__annotation(he->ms.sym); + if (!notes->src) + continue; + /* * Don't let this be freed, say, by hists__decay_entry. */ From a93f0e551af9e194db38bfe16001e17a3a1d189a Mon Sep 17 00:00:00 2001 From: Simon Que Date: Mon, 16 Jun 2014 11:32:09 -0700 Subject: [PATCH 010/268] perf symbols: Get kernel start address by symbol name The function machine__get_kernel_start_addr() was taking the first symbol of kallsyms as the start address. This is incorrect in certain cases where the first symbol is something at 0, while the actual kernel functions begin at a later point (e.g. 0x80200000). This patch fixes machine__get_kernel_start_addr() to search for the symbol "_text" or "_stext", which marks the beginning of kernel mapping. This was already being done in machine__create_kernel_maps(). Thus, this patch is just a refactor, to move that code into machine__get_kernel_start_addr(). Signed-off-by: Simon Que Link: http://lkml.kernel.org/r/1402943529-13244-1-git-send-email-sque@chromium.org Signed-off-by: Jiri Olsa --- tools/perf/util/machine.c | 54 ++++++++++++++++----------------------- 1 file changed, 22 insertions(+), 32 deletions(-) diff --git a/tools/perf/util/machine.c b/tools/perf/util/machine.c index 0e5fea95d5967..c73e1fc12e53e 100644 --- a/tools/perf/util/machine.c +++ b/tools/perf/util/machine.c @@ -496,18 +496,6 @@ struct process_args { u64 start; }; -static int symbol__in_kernel(void *arg, const char *name, - char type __maybe_unused, u64 start) -{ - struct process_args *args = arg; - - if (strchr(name, '[')) - return 0; - - args->start = start; - return 1; -} - static void machine__get_kallsyms_filename(struct machine *machine, char *buf, size_t bufsz) { @@ -517,27 +505,41 @@ static void machine__get_kallsyms_filename(struct machine *machine, char *buf, scnprintf(buf, bufsz, "%s/proc/kallsyms", machine->root_dir); } -/* Figure out the start address of kernel map from /proc/kallsyms */ -static u64 machine__get_kernel_start_addr(struct machine *machine) +const char *ref_reloc_sym_names[] = {"_text", "_stext", NULL}; + +/* Figure out the start address of kernel map from /proc/kallsyms. + * Returns the name of the start symbol in *symbol_name. Pass in NULL as + * symbol_name if it's not that important. + */ +static u64 machine__get_kernel_start_addr(struct machine *machine, + const char **symbol_name) { char filename[PATH_MAX]; - struct process_args args; + int i; + const char *name; + u64 addr = 0; machine__get_kallsyms_filename(machine, filename, PATH_MAX); if (symbol__restricted_filename(filename, "/proc/kallsyms")) return 0; - if (kallsyms__parse(filename, &args, symbol__in_kernel) <= 0) - return 0; + for (i = 0; (name = ref_reloc_sym_names[i]) != NULL; i++) { + addr = kallsyms__get_function_start(filename, name); + if (addr) + break; + } + + if (symbol_name) + *symbol_name = name; - return args.start; + return addr; } int __machine__create_kernel_maps(struct machine *machine, struct dso *kernel) { enum map_type type; - u64 start = machine__get_kernel_start_addr(machine); + u64 start = machine__get_kernel_start_addr(machine, NULL); for (type = 0; type < MAP__NR_TYPES; ++type) { struct kmap *kmap; @@ -852,23 +854,11 @@ static int machine__create_modules(struct machine *machine) return 0; } -const char *ref_reloc_sym_names[] = {"_text", "_stext", NULL}; - int machine__create_kernel_maps(struct machine *machine) { struct dso *kernel = machine__get_kernel(machine); - char filename[PATH_MAX]; const char *name; - u64 addr = 0; - int i; - - machine__get_kallsyms_filename(machine, filename, PATH_MAX); - - for (i = 0; (name = ref_reloc_sym_names[i]) != NULL; i++) { - addr = kallsyms__get_function_start(filename, name); - if (addr) - break; - } + u64 addr = machine__get_kernel_start_addr(machine, &name); if (!addr) return -1; From a2b23bacb315d3873ed90029fd2b68c95de734c0 Mon Sep 17 00:00:00 2001 From: Marcel Holtmann Date: Fri, 20 Jun 2014 12:34:28 +0200 Subject: [PATCH 011/268] Revert "Bluetooth: Add a new PID/VID 0cf3/e005 for AR3012." This reverts commit ca58e594da2486c1d28e7ad547d82266604ec4ce. For some unclear reason this patch tries to add suport for the product ID 0xe005, but it ends up adding product ID 0x3005 to all the tables. This is obviously wrong and causing multiple issues. The original patch seemed to be fine, but what ended up in 3.15 is not what the patch intended. The commit 0a3658cccdf53 is already present and adds support for this hardware. This means only revert of this broken commit is requird. Signed-off-by: Marcel Holtmann Reported-by: Alexander Holler Cc: stable@vger.kernel.org # 3.15.x --- drivers/bluetooth/ath3k.c | 2 -- drivers/bluetooth/btusb.c | 1 - 2 files changed, 3 deletions(-) diff --git a/drivers/bluetooth/ath3k.c b/drivers/bluetooth/ath3k.c index f98380648cb35..f50dffc0374fb 100644 --- a/drivers/bluetooth/ath3k.c +++ b/drivers/bluetooth/ath3k.c @@ -90,7 +90,6 @@ static const struct usb_device_id ath3k_table[] = { { USB_DEVICE(0x0b05, 0x17d0) }, { USB_DEVICE(0x0CF3, 0x0036) }, { USB_DEVICE(0x0CF3, 0x3004) }, - { USB_DEVICE(0x0CF3, 0x3005) }, { USB_DEVICE(0x0CF3, 0x3008) }, { USB_DEVICE(0x0CF3, 0x311D) }, { USB_DEVICE(0x0CF3, 0x311E) }, @@ -140,7 +139,6 @@ static const struct usb_device_id ath3k_blist_tbl[] = { { USB_DEVICE(0x0b05, 0x17d0), .driver_info = BTUSB_ATH3012 }, { USB_DEVICE(0x0CF3, 0x0036), .driver_info = BTUSB_ATH3012 }, { USB_DEVICE(0x0cf3, 0x3004), .driver_info = BTUSB_ATH3012 }, - { USB_DEVICE(0x0cf3, 0x3005), .driver_info = BTUSB_ATH3012 }, { USB_DEVICE(0x0cf3, 0x3008), .driver_info = BTUSB_ATH3012 }, { USB_DEVICE(0x0cf3, 0x311D), .driver_info = BTUSB_ATH3012 }, { USB_DEVICE(0x0cf3, 0x311E), .driver_info = BTUSB_ATH3012 }, diff --git a/drivers/bluetooth/btusb.c b/drivers/bluetooth/btusb.c index a1c80b0c7663d..6250fc2fb93a7 100644 --- a/drivers/bluetooth/btusb.c +++ b/drivers/bluetooth/btusb.c @@ -162,7 +162,6 @@ static const struct usb_device_id blacklist_table[] = { { USB_DEVICE(0x0b05, 0x17d0), .driver_info = BTUSB_ATH3012 }, { USB_DEVICE(0x0cf3, 0x0036), .driver_info = BTUSB_ATH3012 }, { USB_DEVICE(0x0cf3, 0x3004), .driver_info = BTUSB_ATH3012 }, - { USB_DEVICE(0x0cf3, 0x3005), .driver_info = BTUSB_ATH3012 }, { USB_DEVICE(0x0cf3, 0x3008), .driver_info = BTUSB_ATH3012 }, { USB_DEVICE(0x0cf3, 0x311d), .driver_info = BTUSB_ATH3012 }, { USB_DEVICE(0x0cf3, 0x311e), .driver_info = BTUSB_ATH3012 }, From c7262e711ae6e466baeb9ddc21d678c878469b1f Mon Sep 17 00:00:00 2001 From: Johan Hedberg Date: Tue, 17 Jun 2014 13:07:37 +0300 Subject: [PATCH 012/268] Bluetooth: Fix overriding higher security level in SMP When we receive a pairing request or an internal request to start pairing we shouldn't blindly overwrite the existing pending_sec_level value as that may actually be higher than the new one. This patch fixes the SMP code to only overwrite the value in case the new one is higher than the old. Signed-off-by: Johan Hedberg Signed-off-by: Marcel Holtmann --- net/bluetooth/smp.c | 23 ++++++++++++++--------- 1 file changed, 14 insertions(+), 9 deletions(-) diff --git a/net/bluetooth/smp.c b/net/bluetooth/smp.c index f2829a7932e24..0189ec8b68d17 100644 --- a/net/bluetooth/smp.c +++ b/net/bluetooth/smp.c @@ -669,7 +669,7 @@ static u8 smp_cmd_pairing_req(struct l2cap_conn *conn, struct sk_buff *skb) { struct smp_cmd_pairing rsp, *req = (void *) skb->data; struct smp_chan *smp; - u8 key_size, auth; + u8 key_size, auth, sec_level; int ret; BT_DBG("conn %p", conn); @@ -695,7 +695,9 @@ static u8 smp_cmd_pairing_req(struct l2cap_conn *conn, struct sk_buff *skb) /* We didn't start the pairing, so match remote */ auth = req->auth_req; - conn->hcon->pending_sec_level = authreq_to_seclevel(auth); + sec_level = authreq_to_seclevel(auth); + if (sec_level > conn->hcon->pending_sec_level) + conn->hcon->pending_sec_level = sec_level; build_pairing_cmd(conn, req, &rsp, auth); @@ -838,6 +840,7 @@ static u8 smp_cmd_security_req(struct l2cap_conn *conn, struct sk_buff *skb) struct smp_cmd_pairing cp; struct hci_conn *hcon = conn->hcon; struct smp_chan *smp; + u8 sec_level; BT_DBG("conn %p", conn); @@ -847,7 +850,9 @@ static u8 smp_cmd_security_req(struct l2cap_conn *conn, struct sk_buff *skb) if (!(conn->hcon->link_mode & HCI_LM_MASTER)) return SMP_CMD_NOTSUPP; - hcon->pending_sec_level = authreq_to_seclevel(rp->auth_req); + sec_level = authreq_to_seclevel(rp->auth_req); + if (sec_level > hcon->pending_sec_level) + hcon->pending_sec_level = sec_level; if (smp_ltk_encrypt(conn, hcon->pending_sec_level)) return 0; @@ -901,9 +906,12 @@ int smp_conn_security(struct hci_conn *hcon, __u8 sec_level) if (smp_sufficient_security(hcon, sec_level)) return 1; + if (sec_level > hcon->pending_sec_level) + hcon->pending_sec_level = sec_level; + if (hcon->link_mode & HCI_LM_MASTER) - if (smp_ltk_encrypt(conn, sec_level)) - goto done; + if (smp_ltk_encrypt(conn, hcon->pending_sec_level)) + return 0; if (test_and_set_bit(HCI_CONN_LE_SMP_PEND, &hcon->flags)) return 0; @@ -918,7 +926,7 @@ int smp_conn_security(struct hci_conn *hcon, __u8 sec_level) * requires it. */ if (hcon->io_capability != HCI_IO_NO_INPUT_OUTPUT || - sec_level > BT_SECURITY_MEDIUM) + hcon->pending_sec_level > BT_SECURITY_MEDIUM) authreq |= SMP_AUTH_MITM; if (hcon->link_mode & HCI_LM_MASTER) { @@ -937,9 +945,6 @@ int smp_conn_security(struct hci_conn *hcon, __u8 sec_level) set_bit(SMP_FLAG_INITIATOR, &smp->flags); -done: - hcon->pending_sec_level = sec_level; - return 0; } From 581370cc74ea75426421a1f5851ef05e2e995b01 Mon Sep 17 00:00:00 2001 From: Johan Hedberg Date: Tue, 17 Jun 2014 13:07:38 +0300 Subject: [PATCH 013/268] Bluetooth: Refactor authentication method lookup into its own function We'll need to do authentication method lookups from more than one place, so refactor the lookup into its own function. Signed-off-by: Johan Hedberg Signed-off-by: Marcel Holtmann --- net/bluetooth/smp.c | 17 ++++++++++++----- 1 file changed, 12 insertions(+), 5 deletions(-) diff --git a/net/bluetooth/smp.c b/net/bluetooth/smp.c index 0189ec8b68d17..7156f47206447 100644 --- a/net/bluetooth/smp.c +++ b/net/bluetooth/smp.c @@ -385,6 +385,16 @@ static const u8 gen_method[5][5] = { { CFM_PASSKEY, CFM_PASSKEY, REQ_PASSKEY, JUST_WORKS, OVERLAP }, }; +static u8 get_auth_method(struct smp_chan *smp, u8 local_io, u8 remote_io) +{ + /* If either side has unknown io_caps, use JUST WORKS */ + if (local_io > SMP_IO_KEYBOARD_DISPLAY || + remote_io > SMP_IO_KEYBOARD_DISPLAY) + return JUST_WORKS; + + return gen_method[remote_io][local_io]; +} + static int tk_request(struct l2cap_conn *conn, u8 remote_oob, u8 auth, u8 local_io, u8 remote_io) { @@ -401,14 +411,11 @@ static int tk_request(struct l2cap_conn *conn, u8 remote_oob, u8 auth, BT_DBG("tk_request: auth:%d lcl:%d rem:%d", auth, local_io, remote_io); /* If neither side wants MITM, use JUST WORKS */ - /* If either side has unknown io_caps, use JUST WORKS */ /* Otherwise, look up method from the table */ - if (!(auth & SMP_AUTH_MITM) || - local_io > SMP_IO_KEYBOARD_DISPLAY || - remote_io > SMP_IO_KEYBOARD_DISPLAY) + if (!(auth & SMP_AUTH_MITM)) method = JUST_WORKS; else - method = gen_method[remote_io][local_io]; + method = get_auth_method(smp, local_io, remote_io); /* If not bonding, don't ask user to confirm a Zero TK */ if (!(auth & SMP_AUTH_BONDING) && method == JUST_CFM) From 2ed8f65ca262bca778e60053f667ce11b32db6b8 Mon Sep 17 00:00:00 2001 From: Johan Hedberg Date: Tue, 17 Jun 2014 13:07:39 +0300 Subject: [PATCH 014/268] Bluetooth: Fix rejecting pairing in case of insufficient capabilities If we need an MITM protected connection but the local and remote IO capabilities cannot provide it we should reject the pairing attempt in the appropriate way. This patch adds the missing checks for such a situation to the smp_cmd_pairing_req() and smp_cmd_pairing_rsp() functions. Signed-off-by: Johan Hedberg Signed-off-by: Marcel Holtmann --- net/bluetooth/smp.c | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) diff --git a/net/bluetooth/smp.c b/net/bluetooth/smp.c index 7156f47206447..e33a982161c1d 100644 --- a/net/bluetooth/smp.c +++ b/net/bluetooth/smp.c @@ -706,6 +706,16 @@ static u8 smp_cmd_pairing_req(struct l2cap_conn *conn, struct sk_buff *skb) if (sec_level > conn->hcon->pending_sec_level) conn->hcon->pending_sec_level = sec_level; + /* If we need MITM check that it can be acheived */ + if (conn->hcon->pending_sec_level >= BT_SECURITY_HIGH) { + u8 method; + + method = get_auth_method(smp, conn->hcon->io_capability, + req->io_capability); + if (method == JUST_WORKS || method == JUST_CFM) + return SMP_AUTH_REQUIREMENTS; + } + build_pairing_cmd(conn, req, &rsp, auth); key_size = min(req->max_key_size, rsp.max_key_size); @@ -752,6 +762,16 @@ static u8 smp_cmd_pairing_rsp(struct l2cap_conn *conn, struct sk_buff *skb) if (check_enc_key_size(conn, key_size)) return SMP_ENC_KEY_SIZE; + /* If we need MITM check that it can be acheived */ + if (conn->hcon->pending_sec_level >= BT_SECURITY_HIGH) { + u8 method; + + method = get_auth_method(smp, req->io_capability, + rsp->io_capability); + if (method == JUST_WORKS || method == JUST_CFM) + return SMP_AUTH_REQUIREMENTS; + } + get_random_bytes(smp->prnd, sizeof(smp->prnd)); smp->prsp[0] = SMP_CMD_PAIRING_RSP; From 1d56dc4f5f7cdf0ba99062d974b7586a28fc5cf4 Mon Sep 17 00:00:00 2001 From: Lukasz Rymanowski Date: Tue, 17 Jun 2014 13:04:20 +0200 Subject: [PATCH 015/268] Bluetooth: Fix for ACL disconnect when pairing fails When pairing fails hci_conn refcnt drops below zero. This cause that ACL link is not disconnected when disconnect timeout fires. Probably this is because l2cap_conn_del calls l2cap_chan_del for each channel, and inside l2cap_chan_del conn is dropped. After that loop hci_chan_del is called which also drops conn. Anyway, as it is desrcibed in hci_core.h, it is known that refcnt drops below 0 sometimes and it should be fine. If so, let disconnect link when hci_conn_timeout fires and refcnt is 0 or below. This patch does it. This affects PTS test SM_TC_JW_BV_05_C Logs from scenario: [69713.706227] [6515] pair_device: [69713.706230] [6515] hci_conn_add: hci0 dst 00:1b:dc:06:06:22 [69713.706233] [6515] hci_dev_hold: hci0 orig refcnt 8 [69713.706235] [6515] hci_conn_init_sysfs: conn ffff88021f65a000 [69713.706239] [6515] hci_req_add_ev: hci0 opcode 0x200d plen 25 [69713.706242] [6515] hci_prepare_cmd: skb len 28 [69713.706243] [6515] hci_req_run: length 1 [69713.706248] [6515] hci_conn_hold: hcon ffff88021f65a000 orig refcnt 0 [69713.706251] [6515] hci_dev_put: hci0 orig refcnt 9 [69713.706281] [8909] hci_cmd_work: hci0 cmd_cnt 1 cmd queued 1 [69713.706288] [8909] hci_send_frame: hci0 type 1 len 28 [69713.706290] [8909] hci_send_to_monitor: hdev ffff88021f0c7000 len 28 [69713.706316] [5949] hci_sock_recvmsg: sock ffff8800941a9680, sk ffff88012bf4d000 [69713.706382] [5949] hci_sock_recvmsg: sock ffff8800941a9680, sk ffff88012bf4d000 [69713.711664] [8909] hci_rx_work: hci0 [69713.711668] [8909] hci_send_to_monitor: hdev ffff88021f0c7000 len 6 [69713.711680] [8909] hci_rx_work: hci0 Event packet [69713.711683] [8909] hci_cs_le_create_conn: hci0 status 0x00 [69713.711685] [8909] hci_sent_cmd_data: hci0 opcode 0x200d [69713.711688] [8909] hci_req_cmd_complete: opcode 0x200d status 0x00 [69713.711690] [8909] hci_sent_cmd_data: hci0 opcode 0x200d [69713.711695] [5949] hci_sock_recvmsg: sock ffff8800941a9680, sk ffff88012bf4d000 [69713.711744] [5949] hci_sock_recvmsg: sock ffff8800941a9680, sk ffff88012bf4d000 [69713.818875] [8909] hci_rx_work: hci0 [69713.818889] [8909] hci_send_to_monitor: hdev ffff88021f0c7000 len 21 [69713.818913] [8909] hci_rx_work: hci0 Event packet [69713.818917] [8909] hci_le_conn_complete_evt: hci0 status 0x00 [69713.818922] [8909] hci_send_to_control: len 19 [69713.818927] [5949] hci_sock_recvmsg: sock ffff8800941a9680, sk ffff88012bf4d000 [69713.818938] [8909] hci_conn_add_sysfs: conn ffff88021f65a000 [69713.818975] [6450] bt_sock_poll: sock ffff88005e758500, sk ffff88010323b800 [69713.818981] [6515] hci_sock_recvmsg: sock ffff88005e75a080, sk ffff88010323ac00 ... [69713.819021] [8909] hci_dev_hold: hci0 orig refcnt 10 [69713.819025] [8909] l2cap_connect_cfm: hcon ffff88021f65a000 bdaddr 00:1b:dc:06:06:22 status 0 [69713.819028] [8909] hci_chan_create: hci0 hcon ffff88021f65a000 [69713.819031] [8909] l2cap_conn_add: hcon ffff88021f65a000 conn ffff880221005c00 hchan ffff88020d60b1c0 [69713.819034] [8909] l2cap_conn_ready: conn ffff880221005c00 [69713.819036] [5949] hci_sock_recvmsg: sock ffff8800941a9680, sk ffff88012bf4d000 [69713.819037] [8909] smp_conn_security: conn ffff880221005c00 hcon ffff88021f65a000 level 0x02 [69713.819039] [8909] smp_chan_create: [69713.819041] [8909] hci_conn_hold: hcon ffff88021f65a000 orig refcnt 1 [69713.819043] [8909] smp_send_cmd: code 0x01 [69713.819045] [8909] hci_send_acl: hci0 chan ffff88020d60b1c0 flags 0x0000 [69713.819046] [5949] hci_sock_recvmsg: sock ffff8800941a9900, sk ffff88012bf4e800 [69713.819049] [8909] hci_queue_acl: hci0 nonfrag skb ffff88005157c100 len 15 [69713.819055] [5949] hci_sock_recvmsg: sock ffff8800941a9900, sk ffff88012bf4e800 [69713.819057] [8909] l2cap_le_conn_ready: [69713.819064] [8909] l2cap_chan_create: chan ffff88005ede2c00 [69713.819066] [8909] l2cap_chan_hold: chan ffff88005ede2c00 orig refcnt 1 [69713.819069] [8909] l2cap_sock_init: sk ffff88005ede5800 [69713.819072] [8909] bt_accept_enqueue: parent ffff880160356000, sk ffff88005ede5800 [69713.819074] [8909] __l2cap_chan_add: conn ffff880221005c00, psm 0x00, dcid 0x0004 [69713.819076] [8909] l2cap_chan_hold: chan ffff88005ede2c00 orig refcnt 2 [69713.819078] [8909] hci_conn_hold: hcon ffff88021f65a000 orig refcnt 2 [69713.819080] [8909] smp_conn_security: conn ffff880221005c00 hcon ffff88021f65a000 level 0x01 [69713.819082] [8909] l2cap_sock_ready_cb: sk ffff88005ede5800, parent ffff880160356000 [69713.819086] [8909] le_pairing_complete_cb: status 0 [69713.819091] [8909] hci_tx_work: hci0 acl 10 sco 8 le 0 [69713.819093] [8909] hci_sched_acl: hci0 [69713.819094] [8909] hci_sched_sco: hci0 [69713.819096] [8909] hci_sched_esco: hci0 [69713.819098] [8909] hci_sched_le: hci0 [69713.819099] [8909] hci_chan_sent: hci0 [69713.819101] [8909] hci_chan_sent: chan ffff88020d60b1c0 quote 10 [69713.819104] [8909] hci_sched_le: chan ffff88020d60b1c0 skb ffff88005157c100 len 15 priority 7 [69713.819106] [8909] hci_send_frame: hci0 type 2 len 15 [69713.819108] [8909] hci_send_to_monitor: hdev ffff88021f0c7000 len 15 [69713.819119] [8909] hci_chan_sent: hci0 [69713.819121] [8909] hci_prio_recalculate: hci0 [69713.819123] [8909] process_pending_rx: [69713.819226] [6450] hci_sock_recvmsg: sock ffff88005e758780, sk ffff88010323d400 ... [69713.822022] [6450] l2cap_sock_accept: sk ffff880160356000 timeo 0 [69713.822024] [6450] bt_accept_dequeue: parent ffff880160356000 [69713.822026] [6450] bt_accept_unlink: sk ffff88005ede5800 state 1 [69713.822028] [6450] l2cap_sock_accept: new socket ffff88005ede5800 [69713.822368] [6450] l2cap_sock_getname: sock ffff8800941ab700, sk ffff88005ede5800 [69713.822375] [6450] l2cap_sock_getsockopt: sk ffff88005ede5800 [69713.822383] [6450] l2cap_sock_getname: sock ffff8800941ab700, sk ffff88005ede5800 [69713.822414] [6450] bt_sock_poll: sock ffff8800941ab700, sk ffff88005ede5800 ... [69713.823255] [6450] l2cap_sock_getname: sock ffff8800941ab700, sk ffff88005ede5800 [69713.823259] [6450] l2cap_sock_getsockopt: sk ffff88005ede5800 [69713.824322] [6450] l2cap_sock_getname: sock ffff8800941ab700, sk ffff88005ede5800 [69713.824330] [6450] l2cap_sock_getsockopt: sk ffff88005ede5800 [69713.825029] [6450] bt_sock_poll: sock ffff88005e758500, sk ffff88010323b800 ... [69713.825187] [6450] l2cap_sock_sendmsg: sock ffff8800941ab700, sk ffff88005ede5800 [69713.825189] [6450] bt_sock_wait_ready: sk ffff88005ede5800 [69713.825192] [6450] l2cap_create_basic_pdu: chan ffff88005ede2c00 len 3 [69713.825196] [6450] l2cap_do_send: chan ffff88005ede2c00, skb ffff880160b0b500 len 7 priority 0 [69713.825199] [6450] hci_send_acl: hci0 chan ffff88020d60b1c0 flags 0x0000 [69713.825201] [6450] hci_queue_acl: hci0 nonfrag skb ffff880160b0b500 len 11 [69713.825210] [8909] hci_tx_work: hci0 acl 9 sco 8 le 0 [69713.825213] [8909] hci_sched_acl: hci0 [69713.825214] [8909] hci_sched_sco: hci0 [69713.825216] [8909] hci_sched_esco: hci0 [69713.825217] [8909] hci_sched_le: hci0 [69713.825219] [8909] hci_chan_sent: hci0 [69713.825221] [8909] hci_chan_sent: chan ffff88020d60b1c0 quote 9 [69713.825223] [8909] hci_sched_le: chan ffff88020d60b1c0 skb ffff880160b0b500 len 11 priority 0 [69713.825225] [8909] hci_send_frame: hci0 type 2 len 11 [69713.825227] [8909] hci_send_to_monitor: hdev ffff88021f0c7000 len 11 [69713.825242] [8909] hci_chan_sent: hci0 [69713.825253] [5949] hci_sock_recvmsg: sock ffff8800941a9680, sk ffff88012bf4d000 [69713.825253] [8909] hci_prio_recalculate: hci0 [69713.825292] [5949] hci_sock_recvmsg: sock ffff8800941a9680, sk ffff88012bf4d000 [69713.825768] [6450] bt_sock_poll: sock ffff88005e758500, sk ffff88010323b800 ... [69713.866902] [8909] hci_rx_work: hci0 [69713.866921] [8909] hci_send_to_monitor: hdev ffff88021f0c7000 len 7 [69713.866928] [8909] hci_rx_work: hci0 Event packet [69713.866931] [8909] hci_num_comp_pkts_evt: hci0 num_hndl 1 [69713.866937] [8909] hci_tx_work: hci0 acl 9 sco 8 le 0 [69713.866939] [5949] hci_sock_recvmsg: sock ffff8800941a9680, sk ffff88012bf4d000 [69713.866940] [8909] hci_sched_acl: hci0 ... [69713.866944] [8909] hci_sched_le: hci0 [69713.866953] [8909] hci_chan_sent: hci0 [69713.866997] [5949] hci_sock_recvmsg: sock ffff8800941a9680, sk ffff88012bf4d000 [69713.867840] [28074] hci_rx_work: hci0 [69713.867844] [28074] hci_send_to_monitor: hdev ffff88021f0c7000 len 7 [69713.867850] [28074] hci_rx_work: hci0 Event packet [69713.867853] [28074] hci_num_comp_pkts_evt: hci0 num_hndl 1 [69713.867857] [5949] hci_sock_recvmsg: sock ffff8800941a9680, sk ffff88012bf4d000 [69713.867858] [28074] hci_tx_work: hci0 acl 10 sco 8 le 0 [69713.867860] [28074] hci_sched_acl: hci0 [69713.867861] [28074] hci_sched_sco: hci0 [69713.867862] [28074] hci_sched_esco: hci0 [69713.867863] [28074] hci_sched_le: hci0 [69713.867865] [28074] hci_chan_sent: hci0 [69713.867888] [5949] hci_sock_recvmsg: sock ffff8800941a9680, sk ffff88012bf4d000 [69714.145661] [8909] hci_rx_work: hci0 [69714.145666] [8909] hci_send_to_monitor: hdev ffff88021f0c7000 len 10 [69714.145676] [8909] hci_rx_work: hci0 ACL data packet [69714.145679] [8909] hci_acldata_packet: hci0 len 6 handle 0x002d flags 0x0002 [69714.145681] [8909] hci_conn_enter_active_mode: hcon ffff88021f65a000 mode 0 [69714.145683] [8909] l2cap_recv_acldata: conn ffff880221005c00 len 6 flags 0x2 [69714.145693] [8909] l2cap_recv_frame: len 2, cid 0x0006 [69714.145696] [8909] hci_send_to_control: len 14 [69714.145710] [8909] smp_chan_destroy: [69714.145713] [8909] pairing_complete: status 3 [69714.145714] [8909] cmd_complete: sock ffff88010323ac00 [69714.145717] [8909] hci_conn_drop: hcon ffff88021f65a000 orig refcnt 3 [69714.145719] [5949] hci_sock_recvmsg: sock ffff8800941a9680, sk ffff88012bf4d000 [69714.145720] [6450] bt_sock_poll: sock ffff88005e758500, sk ffff88010323b800 [69714.145722] [6515] hci_sock_recvmsg: sock ffff88005e75a080, sk ffff88010323ac00 [69714.145724] [6450] bt_sock_poll: sock ffff8801db6b4f00, sk ffff880160351c00 ... [69714.145735] [6515] hci_sock_recvmsg: sock ffff88005e75a080, sk ffff88010323ac00 [69714.145737] [8909] hci_conn_drop: hcon ffff88021f65a000 orig refcnt 2 [69714.145739] [8909] l2cap_conn_del: hcon ffff88021f65a000 conn ffff880221005c00, err 13 [69714.145740] [6450] bt_sock_poll: sock ffff8801db6b5400, sk ffff88021e775000 [69714.145743] [6450] bt_sock_poll: sock ffff8801db6b5e00, sk ffff880160356000 [69714.145744] [8909] l2cap_chan_hold: chan ffff88005ede2c00 orig refcnt 3 [69714.145746] [6450] bt_sock_poll: sock ffff8800941ab700, sk ffff88005ede5800 [69714.145748] [8909] l2cap_chan_del: chan ffff88005ede2c00, conn ffff880221005c00, err 13 [69714.145749] [8909] l2cap_chan_put: chan ffff88005ede2c00 orig refcnt 4 [69714.145751] [8909] hci_conn_drop: hcon ffff88021f65a000 orig refcnt 1 [69714.145754] [6450] bt_sock_poll: sock ffff8800941ab700, sk ffff88005ede5800 [69714.145756] [8909] l2cap_chan_put: chan ffff88005ede2c00 orig refcnt 3 [69714.145759] [8909] hci_chan_del: hci0 hcon ffff88021f65a000 chan ffff88020d60b1c0 [69714.145766] [5949] hci_sock_recvmsg: sock ffff8800941a9680, sk ffff88012bf4d000 [69714.145787] [6515] hci_sock_release: sock ffff88005e75a080 sk ffff88010323ac00 [69714.146002] [6450] hci_sock_recvmsg: sock ffff88005e758780, sk ffff88010323d400 [69714.150795] [6450] l2cap_sock_release: sock ffff8800941ab700, sk ffff88005ede5800 [69714.150799] [6450] l2cap_sock_shutdown: sock ffff8800941ab700, sk ffff88005ede5800 [69714.150802] [6450] l2cap_chan_close: chan ffff88005ede2c00 state BT_CLOSED [69714.150805] [6450] l2cap_sock_kill: sk ffff88005ede5800 state BT_CLOSED [69714.150806] [6450] l2cap_chan_put: chan ffff88005ede2c00 orig refcnt 2 [69714.150808] [6450] l2cap_sock_destruct: sk ffff88005ede5800 [69714.150809] [6450] l2cap_chan_put: chan ffff88005ede2c00 orig refcnt 1 [69714.150811] [6450] l2cap_chan_destroy: chan ffff88005ede2c00 [69714.150970] [6450] bt_sock_poll: sock ffff88005e758500, sk ffff88010323b800 ... [69714.151991] [8909] hci_conn_drop: hcon ffff88021f65a000 orig refcnt 0 [69716.150339] [8909] hci_conn_timeout: hcon ffff88021f65a000 state BT_CONNECTED, refcnt -1 Signed-off-by: Lukasz Rymanowski Signed-off-by: Marcel Holtmann --- net/bluetooth/hci_conn.c | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/net/bluetooth/hci_conn.c b/net/bluetooth/hci_conn.c index ca01d18618549..a7a27bc2c0b1d 100644 --- a/net/bluetooth/hci_conn.c +++ b/net/bluetooth/hci_conn.c @@ -289,10 +289,20 @@ static void hci_conn_timeout(struct work_struct *work) { struct hci_conn *conn = container_of(work, struct hci_conn, disc_work.work); + int refcnt = atomic_read(&conn->refcnt); BT_DBG("hcon %p state %s", conn, state_to_string(conn->state)); - if (atomic_read(&conn->refcnt)) + WARN_ON(refcnt < 0); + + /* FIXME: It was observed that in pairing failed scenario, refcnt + * drops below 0. Probably this is because l2cap_conn_del calls + * l2cap_chan_del for each channel, and inside l2cap_chan_del conn is + * dropped. After that loop hci_chan_del is called which also drops + * conn. For now make sure that ACL is alive if refcnt is higher then 0, + * otherwise drop it. + */ + if (refcnt > 0) return; switch (conn->state) { From 744462a91ecf5d1ec64857488bf99000ef626921 Mon Sep 17 00:00:00 2001 From: Max Stepanov Date: Tue, 10 Jun 2014 20:00:08 +0300 Subject: [PATCH 016/268] mac80211: WEP extra head/tail room in ieee80211_send_auth After skb allocation and call to ieee80211_wep_encrypt in ieee80211_send_auth the flow fails with a warning in ieee80211_wep_add_iv on verification of available head/tailroom needed for WEP_IV and WEP_ICV. Signed-off-by: Max Stepanov Signed-off-by: Emmanuel Grumbach Signed-off-by: Johannes Berg --- net/mac80211/util.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/net/mac80211/util.c b/net/mac80211/util.c index 6886601afe1c7..a6cda52ed9203 100644 --- a/net/mac80211/util.c +++ b/net/mac80211/util.c @@ -1096,11 +1096,12 @@ void ieee80211_send_auth(struct ieee80211_sub_if_data *sdata, int err; /* 24 + 6 = header + auth_algo + auth_transaction + status_code */ - skb = dev_alloc_skb(local->hw.extra_tx_headroom + 24 + 6 + extra_len); + skb = dev_alloc_skb(local->hw.extra_tx_headroom + IEEE80211_WEP_IV_LEN + + 24 + 6 + extra_len + IEEE80211_WEP_ICV_LEN); if (!skb) return; - skb_reserve(skb, local->hw.extra_tx_headroom); + skb_reserve(skb, local->hw.extra_tx_headroom + IEEE80211_WEP_IV_LEN); mgmt = (struct ieee80211_mgmt *) skb_put(skb, 24 + 6); memset(mgmt, 0, 24 + 6); From e33e2241e272eddc38339692500bd1c7d8753a77 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Mon, 23 Jun 2014 11:06:16 +0200 Subject: [PATCH 017/268] Revert "cfg80211: Use 5MHz bandwidth by default when checking usable channels" This reverts commit 8eca1fb692cc9557f386eddce75c300a3855d11a. Felix notes that this broke regulatory, leaving channel 12 open for AP operation in the US regulatory domain where it isn't permitted. Link: http://mid.gmane.org/53A6C0FF.9090104@openwrt.org Reported-by: Felix Fietkau Signed-off-by: Johannes Berg --- net/wireless/reg.c | 22 +++++++--------------- 1 file changed, 7 insertions(+), 15 deletions(-) diff --git a/net/wireless/reg.c b/net/wireless/reg.c index 558b0e3a02d82..1afdf45db38f2 100644 --- a/net/wireless/reg.c +++ b/net/wireless/reg.c @@ -935,7 +935,7 @@ freq_reg_info_regd(struct wiphy *wiphy, u32 center_freq, if (!band_rule_found) band_rule_found = freq_in_rule_band(fr, center_freq); - bw_fits = reg_does_bw_fit(fr, center_freq, MHZ_TO_KHZ(5)); + bw_fits = reg_does_bw_fit(fr, center_freq, MHZ_TO_KHZ(20)); if (band_rule_found && bw_fits) return rr; @@ -1019,10 +1019,10 @@ static void chan_reg_rule_print_dbg(const struct ieee80211_regdomain *regd, } #endif -/* Find an ieee80211_reg_rule such that a 5MHz channel with frequency - * chan->center_freq fits there. - * If there is no such reg_rule, disable the channel, otherwise set the - * flags corresponding to the bandwidths allowed in the particular reg_rule +/* + * Note that right now we assume the desired channel bandwidth + * is always 20 MHz for each individual channel (HT40 uses 20 MHz + * per channel, the primary and the extension channel). */ static void handle_channel(struct wiphy *wiphy, enum nl80211_reg_initiator initiator, @@ -1083,12 +1083,8 @@ static void handle_channel(struct wiphy *wiphy, if (reg_rule->flags & NL80211_RRF_AUTO_BW) max_bandwidth_khz = reg_get_max_bandwidth(regd, reg_rule); - if (max_bandwidth_khz < MHZ_TO_KHZ(10)) - bw_flags = IEEE80211_CHAN_NO_10MHZ; - if (max_bandwidth_khz < MHZ_TO_KHZ(20)) - bw_flags |= IEEE80211_CHAN_NO_20MHZ; if (max_bandwidth_khz < MHZ_TO_KHZ(40)) - bw_flags |= IEEE80211_CHAN_NO_HT40; + bw_flags = IEEE80211_CHAN_NO_HT40; if (max_bandwidth_khz < MHZ_TO_KHZ(80)) bw_flags |= IEEE80211_CHAN_NO_80MHZ; if (max_bandwidth_khz < MHZ_TO_KHZ(160)) @@ -1522,12 +1518,8 @@ static void handle_channel_custom(struct wiphy *wiphy, if (reg_rule->flags & NL80211_RRF_AUTO_BW) max_bandwidth_khz = reg_get_max_bandwidth(regd, reg_rule); - if (max_bandwidth_khz < MHZ_TO_KHZ(10)) - bw_flags = IEEE80211_CHAN_NO_10MHZ; - if (max_bandwidth_khz < MHZ_TO_KHZ(20)) - bw_flags |= IEEE80211_CHAN_NO_20MHZ; if (max_bandwidth_khz < MHZ_TO_KHZ(40)) - bw_flags |= IEEE80211_CHAN_NO_HT40; + bw_flags = IEEE80211_CHAN_NO_HT40; if (max_bandwidth_khz < MHZ_TO_KHZ(80)) bw_flags |= IEEE80211_CHAN_NO_80MHZ; if (max_bandwidth_khz < MHZ_TO_KHZ(160)) From 0ce12026d6ea4a24d52ddcde36b9643f2e26d560 Mon Sep 17 00:00:00 2001 From: Eliad Peller Date: Tue, 17 Jun 2014 13:07:02 +0300 Subject: [PATCH 018/268] cfg80211: fix elapsed_jiffies calculation MAX_JIFFY_OFFSET has no meaning when calculating the elapsed jiffies, as jiffies run out until ULONG_MAX. This miscalculation results in erroneous values in case of a wrap-around. Signed-off-by: Eliad Peller Signed-off-by: Johannes Berg --- net/wireless/core.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/wireless/core.h b/net/wireless/core.h index e9afbf10e756b..7e3a3cef7df93 100644 --- a/net/wireless/core.h +++ b/net/wireless/core.h @@ -424,7 +424,7 @@ static inline unsigned int elapsed_jiffies_msecs(unsigned long start) if (end >= start) return jiffies_to_msecs(end - start); - return jiffies_to_msecs(end + (MAX_JIFFY_OFFSET - start) + 1); + return jiffies_to_msecs(end + (ULONG_MAX - start) + 1); } void From 48439d501e3d9e8634bdc0c418e066870039599d Mon Sep 17 00:00:00 2001 From: Loic Poulain Date: Mon, 23 Jun 2014 17:42:44 +0200 Subject: [PATCH 019/268] Bluetooth: Ignore H5 non-link packets in non-active state When detecting a non-link packet, h5_reset_rx() frees the Rx skb. Not returning after that will cause the upcoming h5_rx_payload() call to dereference a now NULL Rx skb and trigger a kernel oops. Signed-off-by: Loic Poulain Signed-off-by: Marcel Holtmann Cc: stable@vger.kernel.org --- drivers/bluetooth/hci_h5.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/bluetooth/hci_h5.c b/drivers/bluetooth/hci_h5.c index 04680ead9275c..fede8ca7147c8 100644 --- a/drivers/bluetooth/hci_h5.c +++ b/drivers/bluetooth/hci_h5.c @@ -406,6 +406,7 @@ static int h5_rx_3wire_hdr(struct hci_uart *hu, unsigned char c) H5_HDR_PKT_TYPE(hdr) != HCI_3WIRE_LINK_PKT) { BT_ERR("Non-link packet received in non-active state"); h5_reset_rx(h5); + return 0; } h5->rx_func = h5_rx_payload; From 546a9d8519ed137b2804a3f5a3659003039dd49c Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Thu, 19 Jun 2014 14:57:10 -0700 Subject: [PATCH 020/268] rcu: Export debug_init_rcu_head() and and debug_init_rcu_head() Currently, call_rcu() relies on implicit allocation and initialization for the debug-objects handling of RCU callbacks. If you hammer the kernel hard enough with Sasha's modified version of trinity, you can end up with the sl*b allocators recursing into themselves via this implicit call_rcu() allocation. This commit therefore exports the debug_init_rcu_head() and debug_rcu_head_free() functions, which permits the allocators to allocated and pre-initialize the debug-objects information, so that there no longer any need for call_rcu() to do that initialization, which in turn prevents the recursion into the memory allocators. Reported-by: Sasha Levin Suggested-by: Thomas Gleixner Signed-off-by: Paul E. McKenney Acked-by: Thomas Gleixner Looks-good-to: Christoph Lameter --- include/linux/rcupdate.h | 10 ++++++++++ kernel/rcu/update.c | 4 ++-- 2 files changed, 12 insertions(+), 2 deletions(-) diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h index 5a75d19aa661e..13bbfbde41b94 100644 --- a/include/linux/rcupdate.h +++ b/include/linux/rcupdate.h @@ -358,9 +358,19 @@ void wait_rcu_gp(call_rcu_func_t crf); * initialization. */ #ifdef CONFIG_DEBUG_OBJECTS_RCU_HEAD +void init_rcu_head(struct rcu_head *head); +void destroy_rcu_head(struct rcu_head *head); void init_rcu_head_on_stack(struct rcu_head *head); void destroy_rcu_head_on_stack(struct rcu_head *head); #else /* !CONFIG_DEBUG_OBJECTS_RCU_HEAD */ +static inline void init_rcu_head(struct rcu_head *head) +{ +} + +static inline void destroy_rcu_head(struct rcu_head *head) +{ +} + static inline void init_rcu_head_on_stack(struct rcu_head *head) { } diff --git a/kernel/rcu/update.c b/kernel/rcu/update.c index a2aeb4df0f603..0fb691e63ce6f 100644 --- a/kernel/rcu/update.c +++ b/kernel/rcu/update.c @@ -200,12 +200,12 @@ void wait_rcu_gp(call_rcu_func_t crf) EXPORT_SYMBOL_GPL(wait_rcu_gp); #ifdef CONFIG_DEBUG_OBJECTS_RCU_HEAD -static inline void debug_init_rcu_head(struct rcu_head *head) +void init_rcu_head(struct rcu_head *head) { debug_object_init(head, &rcuhead_debug_descr); } -static inline void debug_rcu_head_free(struct rcu_head *head) +void destroy_rcu_head(struct rcu_head *head) { debug_object_free(head, &rcuhead_debug_descr); } From 4a81e8328d3791a4f99bf5b436d050f6dc5ffea3 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Fri, 20 Jun 2014 16:49:01 -0700 Subject: [PATCH 021/268] rcu: Reduce overhead of cond_resched() checks for RCU Commit ac1bea85781e (Make cond_resched() report RCU quiescent states) fixed a problem where a CPU looping in the kernel with but one runnable task would give RCU CPU stall warnings, even if the in-kernel loop contained cond_resched() calls. Unfortunately, in so doing, it introduced performance regressions in Anton Blanchard's will-it-scale "open1" test. The problem appears to be not so much the increased cond_resched() path length as an increase in the rate at which grace periods complete, which increased per-update grace-period overhead. This commit takes a different approach to fixing this bug, mainly by moving the RCU-visible quiescent state from cond_resched() to rcu_note_context_switch(), and by further reducing the check to a simple non-zero test of a single per-CPU variable. However, this approach requires that the force-quiescent-state processing send resched IPIs to the offending CPUs. These will be sent only once the grace period has reached an age specified by the boot/sysfs parameter rcutree.jiffies_till_sched_qs, or once the grace period reaches an age halfway to the point at which RCU CPU stall warnings will be emitted, whichever comes first. Reported-by: Dave Hansen Signed-off-by: Paul E. McKenney Cc: Andi Kleen Cc: Christoph Lameter Cc: Mike Galbraith Cc: Eric Dumazet Reviewed-by: Josh Triplett [ paulmck: Made rcu_momentary_dyntick_idle() as suggested by the ktest build robot. Also fixed smp_mb() comment as noted by Oleg Nesterov. ] Merge with e552592e (Reduce overhead of cond_resched() checks for RCU) Signed-off-by: Paul E. McKenney --- Documentation/kernel-parameters.txt | 6 ++ include/linux/rcupdate.h | 36 ------- kernel/rcu/tree.c | 140 ++++++++++++++++++++++------ kernel/rcu/tree.h | 6 +- kernel/rcu/tree_plugin.h | 2 +- kernel/rcu/update.c | 18 ---- kernel/sched/core.c | 7 +- 7 files changed, 125 insertions(+), 90 deletions(-) diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt index 6eaa9cdb7094b..910c3829f81d0 100644 --- a/Documentation/kernel-parameters.txt +++ b/Documentation/kernel-parameters.txt @@ -2785,6 +2785,12 @@ bytes respectively. Such letter suffixes can also be entirely omitted. leaf rcu_node structure. Useful for very large systems. + rcutree.jiffies_till_sched_qs= [KNL] + Set required age in jiffies for a + given grace period before RCU starts + soliciting quiescent-state help from + rcu_note_context_switch(). + rcutree.jiffies_till_first_fqs= [KNL] Set delay from grace-period initialization to first attempt to force quiescent states. diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h index 13bbfbde41b94..6a94cc8b1ca08 100644 --- a/include/linux/rcupdate.h +++ b/include/linux/rcupdate.h @@ -44,7 +44,6 @@ #include #include #include -#include #include extern int rcu_expedited; /* for sysctl */ @@ -299,41 +298,6 @@ static inline void rcu_user_hooks_switch(struct task_struct *prev, bool __rcu_is_watching(void); #endif /* #if defined(CONFIG_DEBUG_LOCK_ALLOC) || defined(CONFIG_RCU_TRACE) || defined(CONFIG_SMP) */ -/* - * Hooks for cond_resched() and friends to avoid RCU CPU stall warnings. - */ - -#define RCU_COND_RESCHED_LIM 256 /* ms vs. 100s of ms. */ -DECLARE_PER_CPU(int, rcu_cond_resched_count); -void rcu_resched(void); - -/* - * Is it time to report RCU quiescent states? - * - * Note unsynchronized access to rcu_cond_resched_count. Yes, we might - * increment some random CPU's count, and possibly also load the result from - * yet another CPU's count. We might even clobber some other CPU's attempt - * to zero its counter. This is all OK because the goal is not precision, - * but rather reasonable amortization of rcu_note_context_switch() overhead - * and extremely high probability of avoiding RCU CPU stall warnings. - * Note that this function has to be preempted in just the wrong place, - * many thousands of times in a row, for anything bad to happen. - */ -static inline bool rcu_should_resched(void) -{ - return raw_cpu_inc_return(rcu_cond_resched_count) >= - RCU_COND_RESCHED_LIM; -} - -/* - * Report quiscent states to RCU if it is time to do so. - */ -static inline void rcu_cond_resched(void) -{ - if (unlikely(rcu_should_resched())) - rcu_resched(); -} - /* * Infrastructure to implement the synchronize_() primitives in * TREE_RCU and rcu_barrier_() primitives in TINY_RCU. diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c index f1ba77363fbb9..625d0b0cd75a0 100644 --- a/kernel/rcu/tree.c +++ b/kernel/rcu/tree.c @@ -206,6 +206,70 @@ void rcu_bh_qs(int cpu) rdp->passed_quiesce = 1; } +static DEFINE_PER_CPU(int, rcu_sched_qs_mask); + +static DEFINE_PER_CPU(struct rcu_dynticks, rcu_dynticks) = { + .dynticks_nesting = DYNTICK_TASK_EXIT_IDLE, + .dynticks = ATOMIC_INIT(1), +#ifdef CONFIG_NO_HZ_FULL_SYSIDLE + .dynticks_idle_nesting = DYNTICK_TASK_NEST_VALUE, + .dynticks_idle = ATOMIC_INIT(1), +#endif /* #ifdef CONFIG_NO_HZ_FULL_SYSIDLE */ +}; + +/* + * Let the RCU core know that this CPU has gone through the scheduler, + * which is a quiescent state. This is called when the need for a + * quiescent state is urgent, so we burn an atomic operation and full + * memory barriers to let the RCU core know about it, regardless of what + * this CPU might (or might not) do in the near future. + * + * We inform the RCU core by emulating a zero-duration dyntick-idle + * period, which we in turn do by incrementing the ->dynticks counter + * by two. + */ +static void rcu_momentary_dyntick_idle(void) +{ + unsigned long flags; + struct rcu_data *rdp; + struct rcu_dynticks *rdtp; + int resched_mask; + struct rcu_state *rsp; + + local_irq_save(flags); + + /* + * Yes, we can lose flag-setting operations. This is OK, because + * the flag will be set again after some delay. + */ + resched_mask = raw_cpu_read(rcu_sched_qs_mask); + raw_cpu_write(rcu_sched_qs_mask, 0); + + /* Find the flavor that needs a quiescent state. */ + for_each_rcu_flavor(rsp) { + rdp = raw_cpu_ptr(rsp->rda); + if (!(resched_mask & rsp->flavor_mask)) + continue; + smp_mb(); /* rcu_sched_qs_mask before cond_resched_completed. */ + if (ACCESS_ONCE(rdp->mynode->completed) != + ACCESS_ONCE(rdp->cond_resched_completed)) + continue; + + /* + * Pretend to be momentarily idle for the quiescent state. + * This allows the grace-period kthread to record the + * quiescent state, with no need for this CPU to do anything + * further. + */ + rdtp = this_cpu_ptr(&rcu_dynticks); + smp_mb__before_atomic(); /* Earlier stuff before QS. */ + atomic_add(2, &rdtp->dynticks); /* QS. */ + smp_mb__after_atomic(); /* Later stuff after QS. */ + break; + } + local_irq_restore(flags); +} + /* * Note a context switch. This is a quiescent state for RCU-sched, * and requires special handling for preemptible RCU. @@ -216,19 +280,12 @@ void rcu_note_context_switch(int cpu) trace_rcu_utilization(TPS("Start context switch")); rcu_sched_qs(cpu); rcu_preempt_note_context_switch(cpu); + if (unlikely(raw_cpu_read(rcu_sched_qs_mask))) + rcu_momentary_dyntick_idle(); trace_rcu_utilization(TPS("End context switch")); } EXPORT_SYMBOL_GPL(rcu_note_context_switch); -static DEFINE_PER_CPU(struct rcu_dynticks, rcu_dynticks) = { - .dynticks_nesting = DYNTICK_TASK_EXIT_IDLE, - .dynticks = ATOMIC_INIT(1), -#ifdef CONFIG_NO_HZ_FULL_SYSIDLE - .dynticks_idle_nesting = DYNTICK_TASK_NEST_VALUE, - .dynticks_idle = ATOMIC_INIT(1), -#endif /* #ifdef CONFIG_NO_HZ_FULL_SYSIDLE */ -}; - static long blimit = 10; /* Maximum callbacks per rcu_do_batch. */ static long qhimark = 10000; /* If this many pending, ignore blimit. */ static long qlowmark = 100; /* Once only this many pending, use blimit. */ @@ -243,6 +300,13 @@ static ulong jiffies_till_next_fqs = ULONG_MAX; module_param(jiffies_till_first_fqs, ulong, 0644); module_param(jiffies_till_next_fqs, ulong, 0644); +/* + * How long the grace period must be before we start recruiting + * quiescent-state help from rcu_note_context_switch(). + */ +static ulong jiffies_till_sched_qs = HZ / 20; +module_param(jiffies_till_sched_qs, ulong, 0644); + static bool rcu_start_gp_advanced(struct rcu_state *rsp, struct rcu_node *rnp, struct rcu_data *rdp); static void force_qs_rnp(struct rcu_state *rsp, @@ -853,6 +917,7 @@ static int rcu_implicit_dynticks_qs(struct rcu_data *rdp, bool *isidle, unsigned long *maxj) { unsigned int curr; + int *rcrmp; unsigned int snap; curr = (unsigned int)atomic_add_return(0, &rdp->dynticks->dynticks); @@ -893,27 +958,43 @@ static int rcu_implicit_dynticks_qs(struct rcu_data *rdp, } /* - * There is a possibility that a CPU in adaptive-ticks state - * might run in the kernel with the scheduling-clock tick disabled - * for an extended time period. Invoke rcu_kick_nohz_cpu() to - * force the CPU to restart the scheduling-clock tick in this - * CPU is in this state. - */ - rcu_kick_nohz_cpu(rdp->cpu); - - /* - * Alternatively, the CPU might be running in the kernel - * for an extended period of time without a quiescent state. - * Attempt to force the CPU through the scheduler to gain the - * needed quiescent state, but only if the grace period has gone - * on for an uncommonly long time. If there are many stuck CPUs, - * we will beat on the first one until it gets unstuck, then move - * to the next. Only do this for the primary flavor of RCU. + * A CPU running for an extended time within the kernel can + * delay RCU grace periods. When the CPU is in NO_HZ_FULL mode, + * even context-switching back and forth between a pair of + * in-kernel CPU-bound tasks cannot advance grace periods. + * So if the grace period is old enough, make the CPU pay attention. + * Note that the unsynchronized assignments to the per-CPU + * rcu_sched_qs_mask variable are safe. Yes, setting of + * bits can be lost, but they will be set again on the next + * force-quiescent-state pass. So lost bit sets do not result + * in incorrect behavior, merely in a grace period lasting + * a few jiffies longer than it might otherwise. Because + * there are at most four threads involved, and because the + * updates are only once every few jiffies, the probability of + * lossage (and thus of slight grace-period extension) is + * quite low. + * + * Note that if the jiffies_till_sched_qs boot/sysfs parameter + * is set too high, we override with half of the RCU CPU stall + * warning delay. */ - if (rdp->rsp == rcu_state_p && + rcrmp = &per_cpu(rcu_sched_qs_mask, rdp->cpu); + if (ULONG_CMP_GE(jiffies, + rdp->rsp->gp_start + jiffies_till_sched_qs) || ULONG_CMP_GE(jiffies, rdp->rsp->jiffies_resched)) { - rdp->rsp->jiffies_resched += 5; - resched_cpu(rdp->cpu); + if (!(ACCESS_ONCE(*rcrmp) & rdp->rsp->flavor_mask)) { + ACCESS_ONCE(rdp->cond_resched_completed) = + ACCESS_ONCE(rdp->mynode->completed); + smp_mb(); /* ->cond_resched_completed before *rcrmp. */ + ACCESS_ONCE(*rcrmp) = + ACCESS_ONCE(*rcrmp) + rdp->rsp->flavor_mask; + resched_cpu(rdp->cpu); /* Force CPU into scheduler. */ + rdp->rsp->jiffies_resched += 5; /* Enable beating. */ + } else if (ULONG_CMP_GE(jiffies, rdp->rsp->jiffies_resched)) { + /* Time to beat on that CPU again! */ + resched_cpu(rdp->cpu); /* Force CPU into scheduler. */ + rdp->rsp->jiffies_resched += 5; /* Re-enable beating. */ + } } return 0; @@ -3491,6 +3572,7 @@ static void __init rcu_init_one(struct rcu_state *rsp, "rcu_node_fqs_1", "rcu_node_fqs_2", "rcu_node_fqs_3" }; /* Match MAX_RCU_LVLS */ + static u8 fl_mask = 0x1; int cpustride = 1; int i; int j; @@ -3509,6 +3591,8 @@ static void __init rcu_init_one(struct rcu_state *rsp, for (i = 1; i < rcu_num_lvls; i++) rsp->level[i] = rsp->level[i - 1] + rsp->levelcnt[i - 1]; rcu_init_levelspread(rsp); + rsp->flavor_mask = fl_mask; + fl_mask <<= 1; /* Initialize the elements themselves, starting from the leaves. */ diff --git a/kernel/rcu/tree.h b/kernel/rcu/tree.h index bf2c1e6696917..0f69a79c5b7dc 100644 --- a/kernel/rcu/tree.h +++ b/kernel/rcu/tree.h @@ -307,6 +307,9 @@ struct rcu_data { /* 4) reasons this CPU needed to be kicked by force_quiescent_state */ unsigned long dynticks_fqs; /* Kicked due to dynticks idle. */ unsigned long offline_fqs; /* Kicked due to being offline. */ + unsigned long cond_resched_completed; + /* Grace period that needs help */ + /* from cond_resched(). */ /* 5) __rcu_pending() statistics. */ unsigned long n_rcu_pending; /* rcu_pending() calls since boot. */ @@ -392,6 +395,7 @@ struct rcu_state { struct rcu_node *level[RCU_NUM_LVLS]; /* Hierarchy levels. */ u32 levelcnt[MAX_RCU_LVLS + 1]; /* # nodes in each level. */ u8 levelspread[RCU_NUM_LVLS]; /* kids/node in each level. */ + u8 flavor_mask; /* bit in flavor mask. */ struct rcu_data __percpu *rda; /* pointer of percu rcu_data. */ void (*call)(struct rcu_head *head, /* call_rcu() flavor. */ void (*func)(struct rcu_head *head)); @@ -563,7 +567,7 @@ static bool rcu_nocb_need_deferred_wakeup(struct rcu_data *rdp); static void do_nocb_deferred_wakeup(struct rcu_data *rdp); static void rcu_boot_init_nocb_percpu_data(struct rcu_data *rdp); static void rcu_spawn_nocb_kthreads(struct rcu_state *rsp); -static void rcu_kick_nohz_cpu(int cpu); +static void __maybe_unused rcu_kick_nohz_cpu(int cpu); static bool init_nocb_callback_list(struct rcu_data *rdp); static void rcu_sysidle_enter(struct rcu_dynticks *rdtp, int irq); static void rcu_sysidle_exit(struct rcu_dynticks *rdtp, int irq); diff --git a/kernel/rcu/tree_plugin.h b/kernel/rcu/tree_plugin.h index cbc2c45265e2a..02ac0fb186b82 100644 --- a/kernel/rcu/tree_plugin.h +++ b/kernel/rcu/tree_plugin.h @@ -2404,7 +2404,7 @@ static bool init_nocb_callback_list(struct rcu_data *rdp) * if an adaptive-ticks CPU is failing to respond to the current grace * period and has not be idle from an RCU perspective, kick it. */ -static void rcu_kick_nohz_cpu(int cpu) +static void __maybe_unused rcu_kick_nohz_cpu(int cpu) { #ifdef CONFIG_NO_HZ_FULL if (tick_nohz_full_cpu(cpu)) diff --git a/kernel/rcu/update.c b/kernel/rcu/update.c index 0fb691e63ce6f..bc78835705302 100644 --- a/kernel/rcu/update.c +++ b/kernel/rcu/update.c @@ -350,21 +350,3 @@ static int __init check_cpu_stall_init(void) early_initcall(check_cpu_stall_init); #endif /* #ifdef CONFIG_RCU_STALL_COMMON */ - -/* - * Hooks for cond_resched() and friends to avoid RCU CPU stall warnings. - */ - -DEFINE_PER_CPU(int, rcu_cond_resched_count); - -/* - * Report a set of RCU quiescent states, for use by cond_resched() - * and friends. Out of line due to being called infrequently. - */ -void rcu_resched(void) -{ - preempt_disable(); - __this_cpu_write(rcu_cond_resched_count, 0); - rcu_note_context_switch(smp_processor_id()); - preempt_enable(); -} diff --git a/kernel/sched/core.c b/kernel/sched/core.c index 3bdf01b494fe2..bc1638b334494 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -4147,7 +4147,6 @@ static void __cond_resched(void) int __sched _cond_resched(void) { - rcu_cond_resched(); if (should_resched()) { __cond_resched(); return 1; @@ -4166,18 +4165,15 @@ EXPORT_SYMBOL(_cond_resched); */ int __cond_resched_lock(spinlock_t *lock) { - bool need_rcu_resched = rcu_should_resched(); int resched = should_resched(); int ret = 0; lockdep_assert_held(lock); - if (spin_needbreak(lock) || resched || need_rcu_resched) { + if (spin_needbreak(lock) || resched) { spin_unlock(lock); if (resched) __cond_resched(); - else if (unlikely(need_rcu_resched)) - rcu_resched(); else cpu_relax(); ret = 1; @@ -4191,7 +4187,6 @@ int __sched __cond_resched_softirq(void) { BUG_ON(!in_softirq()); - rcu_cond_resched(); /* BH disabled OK, just recording QSes. */ if (should_resched()) { local_bh_enable(); __cond_resched(); From 89879413eba4d9f57b69f84a32fd085d54057df3 Mon Sep 17 00:00:00 2001 From: Eliad Peller Date: Mon, 26 May 2014 18:44:35 +0300 Subject: [PATCH 022/268] iwlwifi: mvm: rework sched scan channel configuration The current sched scan channel configuration code configures all the supported channels for scanning. However, this can result in SYSASSERT in some cases, when the configured channel is disabled. Instead, configure only the channels given in the req struct, and set the channel_count field appropriately. While on it, change the code to use channel->hw_value instead of recalculating the channel number. Signed-off-by: Eliad Peller Signed-off-by: Emmanuel Grumbach --- drivers/net/wireless/iwlwifi/mvm/scan.c | 65 ++++++++----------------- 1 file changed, 19 insertions(+), 46 deletions(-) diff --git a/drivers/net/wireless/iwlwifi/mvm/scan.c b/drivers/net/wireless/iwlwifi/mvm/scan.c index 4b6c7d4bd199e..eac2b424f6a06 100644 --- a/drivers/net/wireless/iwlwifi/mvm/scan.c +++ b/drivers/net/wireless/iwlwifi/mvm/scan.c @@ -588,9 +588,7 @@ static void iwl_build_scan_cmd(struct iwl_mvm *mvm, struct iwl_scan_offload_cmd *scan, struct iwl_mvm_scan_params *params) { - scan->channel_count = - mvm->nvm_data->bands[IEEE80211_BAND_2GHZ].n_channels + - mvm->nvm_data->bands[IEEE80211_BAND_5GHZ].n_channels; + scan->channel_count = req->n_channels; scan->quiet_time = cpu_to_le16(IWL_ACTIVE_QUIET_TIME); scan->quiet_plcp_th = cpu_to_le16(IWL_PLCP_QUIET_THRESH); scan->good_CRC_th = IWL_GOOD_CRC_TH_DEFAULT; @@ -669,61 +667,37 @@ static void iwl_build_channel_cfg(struct iwl_mvm *mvm, struct cfg80211_sched_scan_request *req, struct iwl_scan_channel_cfg *channels, enum ieee80211_band band, - int *head, int *tail, + int *head, u32 ssid_bitmap, struct iwl_mvm_scan_params *params) { - struct ieee80211_supported_band *s_band; - int n_channels = req->n_channels; - int i, j, index = 0; - bool partial; + int i, index = 0; - /* - * We have to configure all supported channels, even if we don't want to - * scan on them, but we have to send channels in the order that we want - * to scan. So add requested channels to head of the list and others to - * the end. - */ - s_band = &mvm->nvm_data->bands[band]; - - for (i = 0; i < s_band->n_channels && *head <= *tail; i++) { - partial = false; - for (j = 0; j < n_channels; j++) - if (s_band->channels[i].center_freq == - req->channels[j]->center_freq) { - index = *head; - (*head)++; - /* - * Channels that came with the request will be - * in partial scan . - */ - partial = true; - break; - } - if (!partial) { - index = *tail; - (*tail)--; - } - channels->channel_number[index] = - cpu_to_le16(ieee80211_frequency_to_channel( - s_band->channels[i].center_freq)); + for (i = 0; i < req->n_channels; i++) { + struct ieee80211_channel *chan = req->channels[i]; + + if (chan->band != band) + continue; + + index = *head; + (*head)++; + + channels->channel_number[index] = cpu_to_le16(chan->hw_value); channels->dwell_time[index][0] = params->dwell[band].active; channels->dwell_time[index][1] = params->dwell[band].passive; channels->iter_count[index] = cpu_to_le16(1); channels->iter_interval[index] = 0; - if (!(s_band->channels[i].flags & IEEE80211_CHAN_NO_IR)) + if (!(chan->flags & IEEE80211_CHAN_NO_IR)) channels->type[index] |= cpu_to_le32(IWL_SCAN_OFFLOAD_CHANNEL_ACTIVE); channels->type[index] |= - cpu_to_le32(IWL_SCAN_OFFLOAD_CHANNEL_FULL); - if (partial) - channels->type[index] |= - cpu_to_le32(IWL_SCAN_OFFLOAD_CHANNEL_PARTIAL); + cpu_to_le32(IWL_SCAN_OFFLOAD_CHANNEL_FULL | + IWL_SCAN_OFFLOAD_CHANNEL_PARTIAL); - if (s_band->channels[i].flags & IEEE80211_CHAN_NO_HT40) + if (chan->flags & IEEE80211_CHAN_NO_HT40) channels->type[index] |= cpu_to_le32(IWL_SCAN_OFFLOAD_CHANNEL_NARROW); @@ -740,7 +714,6 @@ int iwl_mvm_config_sched_scan(struct iwl_mvm *mvm, int band_2ghz = mvm->nvm_data->bands[IEEE80211_BAND_2GHZ].n_channels; int band_5ghz = mvm->nvm_data->bands[IEEE80211_BAND_5GHZ].n_channels; int head = 0; - int tail = band_2ghz + band_5ghz - 1; u32 ssid_bitmap; int cmd_len; int ret; @@ -772,7 +745,7 @@ int iwl_mvm_config_sched_scan(struct iwl_mvm *mvm, &scan_cfg->scan_cmd.tx_cmd[0], scan_cfg->data); iwl_build_channel_cfg(mvm, req, &scan_cfg->channel_cfg, - IEEE80211_BAND_2GHZ, &head, &tail, + IEEE80211_BAND_2GHZ, &head, ssid_bitmap, ¶ms); } if (band_5ghz) { @@ -782,7 +755,7 @@ int iwl_mvm_config_sched_scan(struct iwl_mvm *mvm, scan_cfg->data + SCAN_OFFLOAD_PROBE_REQ_SIZE); iwl_build_channel_cfg(mvm, req, &scan_cfg->channel_cfg, - IEEE80211_BAND_5GHZ, &head, &tail, + IEEE80211_BAND_5GHZ, &head, ssid_bitmap, ¶ms); } From 511c66818d87db2a8931e7f7f92c7904bdd84f72 Mon Sep 17 00:00:00 2001 From: Mihai Caraman Date: Wed, 18 Jun 2014 18:45:05 +0300 Subject: [PATCH 023/268] KVM: PPC: Book3E: Unlock mmu_lock when setting caching atttribute The patch 08c9a188d0d0fc0f0c5e17d89a06bb59c493110f kvm: powerpc: use caching attributes as per linux pte do not handle properly the error case, letting mmu_lock locked. The lock will further generate a RCU stall from kvmppc_e500_emul_tlbwe() caller. In case of an error go to out label. Signed-off-by: Mihai Caraman Signed-off-by: Alexander Graf --- arch/powerpc/kvm/e500_mmu_host.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/arch/powerpc/kvm/e500_mmu_host.c b/arch/powerpc/kvm/e500_mmu_host.c index dd2cc03f406f9..86903d3f5a033 100644 --- a/arch/powerpc/kvm/e500_mmu_host.c +++ b/arch/powerpc/kvm/e500_mmu_host.c @@ -473,7 +473,8 @@ static inline int kvmppc_e500_shadow_map(struct kvmppc_vcpu_e500 *vcpu_e500, if (printk_ratelimit()) pr_err("%s: pte not present: gfn %lx, pfn %lx\n", __func__, (long)gfn, pfn); - return -EINVAL; + ret = -EINVAL; + goto out; } kvmppc_e500_ref_setup(ref, gtlbe, pfn, wimg); From 02df00eb0019e7d15a1fcddebe4d020226c1ccda Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Tue, 10 Jun 2014 14:06:25 +0200 Subject: [PATCH 024/268] nl80211: move set_qos_map command into split state The non-split wiphy state shouldn't be increased in size so move the new set_qos_map command into the split if statement. Cc: stable@vger.kernel.org (3.14+) Fixes: fa9ffc745610 ("cfg80211: Add support for QoS mapping") Reviewed-by: Emmanuel Grumbach Signed-off-by: Johannes Berg --- net/wireless/nl80211.c | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c index ba4f1723c83ad..6668daf693266 100644 --- a/net/wireless/nl80211.c +++ b/net/wireless/nl80211.c @@ -1497,18 +1497,17 @@ static int nl80211_send_wiphy(struct cfg80211_registered_device *rdev, } CMD(start_p2p_device, START_P2P_DEVICE); CMD(set_mcast_rate, SET_MCAST_RATE); +#ifdef CONFIG_NL80211_TESTMODE + CMD(testmode_cmd, TESTMODE); +#endif if (state->split) { CMD(crit_proto_start, CRIT_PROTOCOL_START); CMD(crit_proto_stop, CRIT_PROTOCOL_STOP); if (rdev->wiphy.flags & WIPHY_FLAG_HAS_CHANNEL_SWITCH) CMD(channel_switch, CHANNEL_SWITCH); + CMD(set_qos_map, SET_QOS_MAP); } - CMD(set_qos_map, SET_QOS_MAP); - -#ifdef CONFIG_NL80211_TESTMODE - CMD(testmode_cmd, TESTMODE); -#endif - + /* add into the if now */ #undef CMD if (rdev->ops->connect || rdev->ops->auth) { From b3c063ae7279981f7161e63b44f214c62f122b32 Mon Sep 17 00:00:00 2001 From: Oren Givon Date: Sun, 25 May 2014 16:31:58 +0300 Subject: [PATCH 025/268] iwlwifi: update the 7265 series HW IDs Add one more 7265 series HW ID. Edit one existing 7265 series HW ID. CC: [3.13+] Signed-off-by: Oren Givon Signed-off-by: Emmanuel Grumbach --- drivers/net/wireless/iwlwifi/pcie/drv.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/net/wireless/iwlwifi/pcie/drv.c b/drivers/net/wireless/iwlwifi/pcie/drv.c index 7091a18d5a72f..98950e45c7b01 100644 --- a/drivers/net/wireless/iwlwifi/pcie/drv.c +++ b/drivers/net/wireless/iwlwifi/pcie/drv.c @@ -367,6 +367,7 @@ static DEFINE_PCI_DEVICE_TABLE(iwl_hw_card_ids) = { {IWL_PCI_DEVICE(0x095A, 0x5012, iwl7265_2ac_cfg)}, {IWL_PCI_DEVICE(0x095A, 0x5412, iwl7265_2ac_cfg)}, {IWL_PCI_DEVICE(0x095A, 0x5410, iwl7265_2ac_cfg)}, + {IWL_PCI_DEVICE(0x095A, 0x5510, iwl7265_2ac_cfg)}, {IWL_PCI_DEVICE(0x095A, 0x5400, iwl7265_2ac_cfg)}, {IWL_PCI_DEVICE(0x095A, 0x1010, iwl7265_2ac_cfg)}, {IWL_PCI_DEVICE(0x095A, 0x5000, iwl7265_2n_cfg)}, @@ -380,7 +381,7 @@ static DEFINE_PCI_DEVICE_TABLE(iwl_hw_card_ids) = { {IWL_PCI_DEVICE(0x095A, 0x9110, iwl7265_2ac_cfg)}, {IWL_PCI_DEVICE(0x095A, 0x9112, iwl7265_2ac_cfg)}, {IWL_PCI_DEVICE(0x095A, 0x9210, iwl7265_2ac_cfg)}, - {IWL_PCI_DEVICE(0x095A, 0x9200, iwl7265_2ac_cfg)}, + {IWL_PCI_DEVICE(0x095B, 0x9200, iwl7265_2ac_cfg)}, {IWL_PCI_DEVICE(0x095A, 0x9510, iwl7265_2ac_cfg)}, {IWL_PCI_DEVICE(0x095A, 0x9310, iwl7265_2ac_cfg)}, {IWL_PCI_DEVICE(0x095A, 0x9410, iwl7265_2ac_cfg)}, From a42c9fcc4a88cdd246fab3bcf06c4487afee3d88 Mon Sep 17 00:00:00 2001 From: Arik Nemtsov Date: Thu, 8 May 2014 16:17:31 +0300 Subject: [PATCH 026/268] Revert "iwlwifi: remove IWL_UCODE_TLV_FLAGS_UAPSD_SUPPORT flag" This reverts commit dc9a19296a872644f19a06d8eeb5db222d327b41. 3610 cards don't support UAPSD. Signed-off-by: Arik Nemtsov Signed-off-by: Emmanuel Grumbach --- drivers/net/wireless/iwlwifi/iwl-fw.h | 1 + drivers/net/wireless/iwlwifi/mvm/mac80211.c | 7 +++++++ 2 files changed, 8 insertions(+) diff --git a/drivers/net/wireless/iwlwifi/iwl-fw.h b/drivers/net/wireless/iwlwifi/iwl-fw.h index 0aa7c0085c9fd..b1a33322b9bac 100644 --- a/drivers/net/wireless/iwlwifi/iwl-fw.h +++ b/drivers/net/wireless/iwlwifi/iwl-fw.h @@ -88,6 +88,7 @@ * P2P client interfaces simultaneously if they are in different bindings. * @IWL_UCODE_TLV_FLAGS_P2P_BSS_PS_SCM: support power save on BSS station and * P2P client interfaces simultaneously if they are in same bindings. + * @IWL_UCODE_TLV_FLAGS_UAPSD_SUPPORT: General support for uAPSD * @IWL_UCODE_TLV_FLAGS_P2P_PS_UAPSD: P2P client supports uAPSD power save * @IWL_UCODE_TLV_FLAGS_BCAST_FILTERING: uCode supports broadcast filtering. * @IWL_UCODE_TLV_FLAGS_GO_UAPSD: AP/GO interfaces support uAPSD clients diff --git a/drivers/net/wireless/iwlwifi/mvm/mac80211.c b/drivers/net/wireless/iwlwifi/mvm/mac80211.c index 7215f59801863..1cef708cb74da 100644 --- a/drivers/net/wireless/iwlwifi/mvm/mac80211.c +++ b/drivers/net/wireless/iwlwifi/mvm/mac80211.c @@ -303,6 +303,13 @@ int iwl_mvm_mac_setup_register(struct iwl_mvm *mvm) hw->uapsd_max_sp_len = IWL_UAPSD_MAX_SP; } + if (mvm->fw->ucode_capa.flags & IWL_UCODE_TLV_FLAGS_UAPSD_SUPPORT && + !iwlwifi_mod_params.uapsd_disable) { + hw->flags |= IEEE80211_HW_SUPPORTS_UAPSD; + hw->uapsd_queues = IWL_UAPSD_AC_INFO; + hw->uapsd_max_sp_len = IWL_UAPSD_MAX_SP; + } + hw->sta_data_size = sizeof(struct iwl_mvm_sta); hw->vif_data_size = sizeof(struct iwl_mvm_vif); hw->chanctx_data_size = sizeof(u16); From e48393e8cf99f2b070b0a1c3d79411ccddcba2df Mon Sep 17 00:00:00 2001 From: Ilan Peer Date: Thu, 22 May 2014 11:19:02 +0300 Subject: [PATCH 027/268] iwlwifi: mvm: Fix broadcast filtering Current code did not allow sending the broadcast filtering command for P2P Client interfaces. However, this was not enough, since once broadcast filtering command was issued over the station interface after the P2P Client connected, the command also attached the filters to the P2P Client MAC which is not allowed (FW ASSERT 1063). Fix this skipping P2P Client interfaces when constructing the broadcast filtering command Signed-off-by: Ilan Peer Reviewed-by: ArikX Nemtsov Signed-off-by: Emmanuel Grumbach --- drivers/net/wireless/iwlwifi/mvm/mac80211.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/drivers/net/wireless/iwlwifi/mvm/mac80211.c b/drivers/net/wireless/iwlwifi/mvm/mac80211.c index 1cef708cb74da..9bfb90680cdcb 100644 --- a/drivers/net/wireless/iwlwifi/mvm/mac80211.c +++ b/drivers/net/wireless/iwlwifi/mvm/mac80211.c @@ -1166,8 +1166,12 @@ static void iwl_mvm_bcast_filter_iterator(void *_data, u8 *mac, bcast_mac = &cmd->macs[mvmvif->id]; - /* enable filtering only for associated stations */ - if (vif->type != NL80211_IFTYPE_STATION || !vif->bss_conf.assoc) + /* + * enable filtering only for associated stations, but not for P2P + * Clients + */ + if (vif->type != NL80211_IFTYPE_STATION || vif->p2p || + !vif->bss_conf.assoc) return; bcast_mac->default_discard = 1; @@ -1244,10 +1248,6 @@ static int iwl_mvm_configure_bcast_filter(struct iwl_mvm *mvm, if (!(mvm->fw->ucode_capa.flags & IWL_UCODE_TLV_FLAGS_BCAST_FILTERING)) return 0; - /* bcast filtering isn't supported for P2P client */ - if (vif->p2p) - return 0; - if (!iwl_mvm_bcast_filter_build_cmd(mvm, &cmd)) return 0; From 341acbb3aabbcfbf069d7de4ad35f51b58176faf Mon Sep 17 00:00:00 2001 From: "Aneesh Kumar K.V" Date: Mon, 16 Jun 2014 00:17:07 +0530 Subject: [PATCH 028/268] KVM: PPC: BOOK3S: HV: Use base page size when comparing against slb value With guests supporting Multiple page size per segment (MPSS), hpte_page_size returns the actual page size used. Add a new function to return base page size and use that to compare against the the page size calculated from SLB. Without this patch a hpte lookup can fail since we are comparing wrong page size in kvmppc_hv_find_lock_hpte. Signed-off-by: Aneesh Kumar K.V Signed-off-by: Alexander Graf --- arch/powerpc/include/asm/kvm_book3s_64.h | 19 +++++++++++++++++-- arch/powerpc/kvm/book3s_64_mmu_hv.c | 2 +- arch/powerpc/kvm/book3s_hv_rm_mmu.c | 7 ++----- 3 files changed, 20 insertions(+), 8 deletions(-) diff --git a/arch/powerpc/include/asm/kvm_book3s_64.h b/arch/powerpc/include/asm/kvm_book3s_64.h index fddb72b48ce9b..d645428a65a41 100644 --- a/arch/powerpc/include/asm/kvm_book3s_64.h +++ b/arch/powerpc/include/asm/kvm_book3s_64.h @@ -198,8 +198,10 @@ static inline unsigned long compute_tlbie_rb(unsigned long v, unsigned long r, return rb; } -static inline unsigned long hpte_page_size(unsigned long h, unsigned long l) +static inline unsigned long __hpte_page_size(unsigned long h, unsigned long l, + bool is_base_size) { + int size, a_psize; /* Look at the 8 bit LP value */ unsigned int lp = (l >> LP_SHIFT) & ((1 << LP_BITS) - 1); @@ -214,14 +216,27 @@ static inline unsigned long hpte_page_size(unsigned long h, unsigned long l) continue; a_psize = __hpte_actual_psize(lp, size); - if (a_psize != -1) + if (a_psize != -1) { + if (is_base_size) + return 1ul << mmu_psize_defs[size].shift; return 1ul << mmu_psize_defs[a_psize].shift; + } } } return 0; } +static inline unsigned long hpte_page_size(unsigned long h, unsigned long l) +{ + return __hpte_page_size(h, l, 0); +} + +static inline unsigned long hpte_base_page_size(unsigned long h, unsigned long l) +{ + return __hpte_page_size(h, l, 1); +} + static inline unsigned long hpte_rpn(unsigned long ptel, unsigned long psize) { return ((ptel & HPTE_R_RPN) & ~(psize - 1)) >> PAGE_SHIFT; diff --git a/arch/powerpc/kvm/book3s_64_mmu_hv.c b/arch/powerpc/kvm/book3s_64_mmu_hv.c index 80561074078d0..68468d695f12a 100644 --- a/arch/powerpc/kvm/book3s_64_mmu_hv.c +++ b/arch/powerpc/kvm/book3s_64_mmu_hv.c @@ -1562,7 +1562,7 @@ static ssize_t kvm_htab_write(struct file *file, const char __user *buf, goto out; } if (!rma_setup && is_vrma_hpte(v)) { - unsigned long psize = hpte_page_size(v, r); + unsigned long psize = hpte_base_page_size(v, r); unsigned long senc = slb_pgsize_encoding(psize); unsigned long lpcr; diff --git a/arch/powerpc/kvm/book3s_hv_rm_mmu.c b/arch/powerpc/kvm/book3s_hv_rm_mmu.c index 6e6224318c36a..5a24d3c2b6b8c 100644 --- a/arch/powerpc/kvm/book3s_hv_rm_mmu.c +++ b/arch/powerpc/kvm/book3s_hv_rm_mmu.c @@ -814,13 +814,10 @@ long kvmppc_hv_find_lock_hpte(struct kvm *kvm, gva_t eaddr, unsigned long slb_v, r = hpte[i+1]; /* - * Check the HPTE again, including large page size - * Since we don't currently allow any MPSS (mixed - * page-size segment) page sizes, it is sufficient - * to check against the actual page size. + * Check the HPTE again, including base page size */ if ((v & valid) && (v & mask) == val && - hpte_page_size(v, r) == (1ul << pshift)) + hpte_base_page_size(v, r) == (1ul << pshift)) /* Return with the HPTE still locked */ return (hash << 3) + (i >> 1); From d76744a93246eccdca1106037e8ee29debf48277 Mon Sep 17 00:00:00 2001 From: Amitkumar Karwar Date: Fri, 20 Jun 2014 11:45:25 -0700 Subject: [PATCH 029/268] mwifiex: fix Tx timeout issue https://bugzilla.kernel.org/show_bug.cgi?id=70191 https://bugzilla.kernel.org/show_bug.cgi?id=77581 It is observed that sometimes Tx packet is downloaded without adding driver's txpd header. This results in firmware parsing garbage data as packet length. Sometimes firmware is unable to read the packet if length comes out as invalid. This stops further traffic and timeout occurs. The root cause is uninitialized fields in tx_info(skb->cb) of packet used to get garbage values. In this case if MWIFIEX_BUF_FLAG_REQUEUED_PKT flag is mistakenly set, txpd header was skipped. This patch makes sure that tx_info is correctly initialized to fix the problem. Cc: Reported-by: Andrew Wiley Reported-by: Linus Gasser Reported-by: Michael Hirsch Tested-by: Xinming Hu Signed-off-by: Amitkumar Karwar Signed-off-by: Maithili Hinge Signed-off-by: Avinash Patil Signed-off-by: Bing Zhao Signed-off-by: John W. Linville --- drivers/net/wireless/mwifiex/main.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/wireless/mwifiex/main.c b/drivers/net/wireless/mwifiex/main.c index cbabc12fbda39..e91cd0fa5ca81 100644 --- a/drivers/net/wireless/mwifiex/main.c +++ b/drivers/net/wireless/mwifiex/main.c @@ -645,6 +645,7 @@ mwifiex_hard_start_xmit(struct sk_buff *skb, struct net_device *dev) } tx_info = MWIFIEX_SKB_TXCB(skb); + memset(tx_info, 0, sizeof(*tx_info)); tx_info->bss_num = priv->bss_num; tx_info->bss_type = priv->bss_type; tx_info->pkt_len = skb->len; From c6bff5449fa78e1d524bb5b67b8ab82faf835bff Mon Sep 17 00:00:00 2001 From: Arend van Spriel Date: Fri, 20 Jun 2014 22:19:25 +0200 Subject: [PATCH 030/268] brcmfmac: assign chip id and rev in bus interface after brcmf_usb_dlneeded The function brcmf_usb_dlneeded() queries the device to obtain the chip id and revision. So assigning these in bus interface before the call resulted in chip id and revision being zero. This was introduced by: commit 5b8045d484d0ef77d6aa9444023220c5671fa3fe Author: Arend van Spriel Date: Tue May 27 12:56:23 2014 +0200 brcmfmac: use asynchronous firmware request in USB Reviewed-by: Hante Meuleman Reviewed-by: Pieter-Paul Giesberts Signed-off-by: Arend van Spriel Signed-off-by: John W. Linville --- drivers/net/wireless/brcm80211/brcmfmac/usb.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/net/wireless/brcm80211/brcmfmac/usb.c b/drivers/net/wireless/brcm80211/brcmfmac/usb.c index 6db51a666f619..d06fcb05adf25 100644 --- a/drivers/net/wireless/brcm80211/brcmfmac/usb.c +++ b/drivers/net/wireless/brcm80211/brcmfmac/usb.c @@ -1184,8 +1184,6 @@ static int brcmf_usb_probe_cb(struct brcmf_usbdev_info *devinfo) bus->bus_priv.usb = bus_pub; dev_set_drvdata(dev, bus); bus->ops = &brcmf_usb_bus_ops; - bus->chip = bus_pub->devid; - bus->chiprev = bus_pub->chiprev; bus->proto_type = BRCMF_PROTO_BCDC; bus->always_use_fws_queue = true; @@ -1194,6 +1192,9 @@ static int brcmf_usb_probe_cb(struct brcmf_usbdev_info *devinfo) if (ret) goto fail; } + bus->chip = bus_pub->devid; + bus->chiprev = bus_pub->chiprev; + /* request firmware here */ brcmf_fw_get_firmwares(dev, 0, brcmf_usb_get_fwname(devinfo), NULL, brcmf_usb_probe_phase2); From 9715a2e8515217206ebf53040c979fdbeb805a21 Mon Sep 17 00:00:00 2001 From: Alexander Graf Date: Thu, 26 Jun 2014 13:19:40 +0200 Subject: [PATCH 031/268] PPC: Add _GLOBAL_TOC for 32bit Commit ac5a8ee8 started using _GLOBAL_TOC on ppc32 code. Unfortunately it's only defined for 64bit targets though. Define it for ppc32 as well, fixing the build breakage that commit introduced. Signed-off-by: Alexander Graf --- arch/powerpc/include/asm/ppc_asm.h | 2 ++ 1 file changed, 2 insertions(+) diff --git a/arch/powerpc/include/asm/ppc_asm.h b/arch/powerpc/include/asm/ppc_asm.h index 9ea266eae33e2..7e4612528546b 100644 --- a/arch/powerpc/include/asm/ppc_asm.h +++ b/arch/powerpc/include/asm/ppc_asm.h @@ -277,6 +277,8 @@ GLUE(.,name): .globl n; \ n: +#define _GLOBAL_TOC(name) _GLOBAL(name) + #define _KPROBE(n) \ .section ".kprobes.text","a"; \ .globl n; \ From 73413ffac3b713231dac466bca216f970042c5e5 Mon Sep 17 00:00:00 2001 From: Yijing Wang Date: Wed, 25 Jun 2014 12:22:56 +0800 Subject: [PATCH 032/268] bnx2x: Fix the MSI flags MSI-X should use PCI_MSIX_FLAGS not PCI_MSI_FLAGS. Signed-off-by: Yijing Wang Signed-off-by: David S. Miller --- drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c index 2887034523e06..6a8b1453a1b96 100644 --- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c +++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c @@ -12937,7 +12937,7 @@ static int bnx2x_get_num_non_def_sbs(struct pci_dev *pdev, int cnic_cnt) * without the default SB. * For VFs there is no default SB, then we return (index+1). */ - pci_read_config_word(pdev, pdev->msix_cap + PCI_MSI_FLAGS, &control); + pci_read_config_word(pdev, pdev->msix_cap + PCI_MSIX_FLAGS, &control); index = control & PCI_MSIX_FLAGS_QSIZE; From 3e215c8d1b6b772d107f1811b5ee8eae7a046fb4 Mon Sep 17 00:00:00 2001 From: James M Leddy Date: Wed, 25 Jun 2014 17:38:13 -0400 Subject: [PATCH 033/268] udp: Add MIB counters for rcvbuferrors Add MIB counters for rcvbuferrors in UDP to help diagnose problems. Signed-off-by: James M Leddy Acked-by: Eric Dumazet Signed-off-by: David S. Miller --- net/ipv4/udp.c | 5 ++++- net/ipv6/udp.c | 6 +++++- 2 files changed, 9 insertions(+), 2 deletions(-) diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index d92f94b7e4025..7d5a8661df769 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c @@ -1588,8 +1588,11 @@ int udp_queue_rcv_skb(struct sock *sk, struct sk_buff *skb) goto csum_error; - if (sk_rcvqueues_full(sk, skb, sk->sk_rcvbuf)) + if (sk_rcvqueues_full(sk, skb, sk->sk_rcvbuf)) { + UDP_INC_STATS_BH(sock_net(sk), UDP_MIB_RCVBUFERRORS, + is_udplite); goto drop; + } rc = 0; diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c index 95c8347992882..7092ff78fd849 100644 --- a/net/ipv6/udp.c +++ b/net/ipv6/udp.c @@ -674,8 +674,11 @@ int udpv6_queue_rcv_skb(struct sock *sk, struct sk_buff *skb) goto csum_error; } - if (sk_rcvqueues_full(sk, skb, sk->sk_rcvbuf)) + if (sk_rcvqueues_full(sk, skb, sk->sk_rcvbuf)) { + UDP6_INC_STATS_BH(sock_net(sk), + UDP_MIB_RCVBUFERRORS, is_udplite); goto drop; + } skb_dst_drop(skb); @@ -690,6 +693,7 @@ int udpv6_queue_rcv_skb(struct sock *sk, struct sk_buff *skb) bh_unlock_sock(sk); return rc; + csum_error: UDP6_INC_STATS_BH(sock_net(sk), UDP_MIB_CSUMERRORS, is_udplite); drop: From e940f5d6ba6a01f8dbb870854d5205d322452730 Mon Sep 17 00:00:00 2001 From: Hangbin Liu Date: Fri, 27 Jun 2014 09:57:53 +0800 Subject: [PATCH 034/268] ipv6: Fix MLD Query message check Based on RFC3810 6.2, we also need to check the hop limit and router alert option besides source address. Signed-off-by: Hangbin Liu Acked-by: YOSHIFUJI Hideaki Acked-by: Hannes Frederic Sowa Signed-off-by: David S. Miller --- net/ipv6/mcast.c | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) diff --git a/net/ipv6/mcast.c b/net/ipv6/mcast.c index 08b367c6b9cfe..617f0958e164e 100644 --- a/net/ipv6/mcast.c +++ b/net/ipv6/mcast.c @@ -1301,8 +1301,17 @@ int igmp6_event_query(struct sk_buff *skb) len = ntohs(ipv6_hdr(skb)->payload_len) + sizeof(struct ipv6hdr); len -= skb_network_header_len(skb); - /* Drop queries with not link local source */ - if (!(ipv6_addr_type(&ipv6_hdr(skb)->saddr) & IPV6_ADDR_LINKLOCAL)) + /* RFC3810 6.2 + * Upon reception of an MLD message that contains a Query, the node + * checks if the source address of the message is a valid link-local + * address, if the Hop Limit is set to 1, and if the Router Alert + * option is present in the Hop-By-Hop Options header of the IPv6 + * packet. If any of these checks fails, the packet is dropped. + */ + if (!(ipv6_addr_type(&ipv6_hdr(skb)->saddr) & IPV6_ADDR_LINKLOCAL) || + ipv6_hdr(skb)->hop_limit != 1 || + !(IP6CB(skb)->flags & IP6SKB_ROUTERALERT) || + IP6CB(skb)->ra != htons(IPV6_OPT_ROUTERALERT_MLD)) return -EINVAL; idev = __in6_dev_get(skb->dev); From 3fc60aa097b8eb0f701c5bf755bc8f7d3ffeb0bd Mon Sep 17 00:00:00 2001 From: Denis Kirjanov Date: Wed, 25 Jun 2014 21:34:56 +0400 Subject: [PATCH 035/268] powerpc: bpf: Use correct mask while accessing the VLAN tag To get a full tag (and not just a VID) we should access the TCI except the VLAN_TAG_PRESENT field (which means that 802.1q header is present). Also ensure that the VLAN_TAG_PRESENT stay on its place Signed-off-by: Denis Kirjanov Signed-off-by: David S. Miller --- arch/powerpc/net/bpf_jit_comp.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/arch/powerpc/net/bpf_jit_comp.c b/arch/powerpc/net/bpf_jit_comp.c index 6dcdadefd8d05..892167b0a4bc9 100644 --- a/arch/powerpc/net/bpf_jit_comp.c +++ b/arch/powerpc/net/bpf_jit_comp.c @@ -390,10 +390,12 @@ static int bpf_jit_build_body(struct sk_filter *fp, u32 *image, case BPF_ANC | SKF_AD_VLAN_TAG: case BPF_ANC | SKF_AD_VLAN_TAG_PRESENT: BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, vlan_tci) != 2); + BUILD_BUG_ON(VLAN_TAG_PRESENT != 0x1000); + PPC_LHZ_OFFS(r_A, r_skb, offsetof(struct sk_buff, vlan_tci)); if (code == (BPF_ANC | SKF_AD_VLAN_TAG)) - PPC_ANDI(r_A, r_A, VLAN_VID_MASK); + PPC_ANDI(r_A, r_A, ~VLAN_TAG_PRESENT); else PPC_ANDI(r_A, r_A, VLAN_TAG_PRESENT); break; From dba63115ce0c888fcb4cdec3f8a4ba97d144afaf Mon Sep 17 00:00:00 2001 From: Denis Kirjanov Date: Wed, 25 Jun 2014 21:34:57 +0400 Subject: [PATCH 036/268] powerpc: bpf: Fix the broken LD_VLAN_TAG_PRESENT test We have to return the boolean here if the tag presents or not, not just ANDing the TCI with the mask which results to: [ 709.412097] test_bpf: #18 LD_VLAN_TAG_PRESENT [ 709.412245] ret 4096 != 1 [ 709.412332] ret 4096 != 1 [ 709.412333] FAIL (2 times) Signed-off-by: Denis Kirjanov Signed-off-by: David S. Miller --- arch/powerpc/net/bpf_jit_comp.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/arch/powerpc/net/bpf_jit_comp.c b/arch/powerpc/net/bpf_jit_comp.c index 892167b0a4bc9..82e82cadcde56 100644 --- a/arch/powerpc/net/bpf_jit_comp.c +++ b/arch/powerpc/net/bpf_jit_comp.c @@ -394,10 +394,12 @@ static int bpf_jit_build_body(struct sk_filter *fp, u32 *image, PPC_LHZ_OFFS(r_A, r_skb, offsetof(struct sk_buff, vlan_tci)); - if (code == (BPF_ANC | SKF_AD_VLAN_TAG)) + if (code == (BPF_ANC | SKF_AD_VLAN_TAG)) { PPC_ANDI(r_A, r_A, ~VLAN_TAG_PRESENT); - else + } else { PPC_ANDI(r_A, r_A, VLAN_TAG_PRESENT); + PPC_SRWI(r_A, r_A, 12); + } break; case BPF_ANC | SKF_AD_QUEUE: BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, From fe984c08e20f0fc2b4666bf8eeeb02605568387b Mon Sep 17 00:00:00 2001 From: Andy Zhou Date: Tue, 6 May 2014 17:23:48 -0700 Subject: [PATCH 037/268] openvswitch: Fix a double free bug for the sample action When sample action returns with an error, the skb has already been freed. This patch fix a bug to make sure we don't free it again. This bug introduced by commit ccb1352e76cff05 (net: Add Open vSwitch kernel components.) Signed-off-by: Andy Zhou Signed-off-by: Pravin B Shelar --- net/openvswitch/actions.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/net/openvswitch/actions.c b/net/openvswitch/actions.c index c36856a457ca9..e70d8b18e9629 100644 --- a/net/openvswitch/actions.c +++ b/net/openvswitch/actions.c @@ -551,6 +551,8 @@ static int do_execute_actions(struct datapath *dp, struct sk_buff *skb, case OVS_ACTION_ATTR_SAMPLE: err = sample(dp, skb, a); + if (unlikely(err)) /* skb already freed. */ + return err; break; } From e0bb8c44ed5cfcc56b571758ed966ee48779024c Mon Sep 17 00:00:00 2001 From: Wei Zhang Date: Sat, 28 Jun 2014 12:34:53 -0700 Subject: [PATCH 038/268] openvswitch: supply a dummy err_handler of gre_cisco_protocol to prevent kernel crash When use gre vport, openvswitch register a gre_cisco_protocol but does not supply a err_handler with it. The gre_cisco_err() in net/ipv4/gre_demux.c expect err_handler be provided with the gre_cisco_protocol implementation, and call ->err_handler() without existence check, cause the kernel crash. This patch provide a err_handler to fix this bug. This bug introduced by commit aa310701e787087d (openvswitch: Add gre tunnel support.) Signed-off-by: Wei Zhang Signed-off-by: Jesse Gross Signed-off-by: Pravin B Shelar --- net/openvswitch/vport-gre.c | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) diff --git a/net/openvswitch/vport-gre.c b/net/openvswitch/vport-gre.c index 35ec4fed09e22..f49148a07da29 100644 --- a/net/openvswitch/vport-gre.c +++ b/net/openvswitch/vport-gre.c @@ -110,6 +110,22 @@ static int gre_rcv(struct sk_buff *skb, return PACKET_RCVD; } +/* Called with rcu_read_lock and BH disabled. */ +static int gre_err(struct sk_buff *skb, u32 info, + const struct tnl_ptk_info *tpi) +{ + struct ovs_net *ovs_net; + struct vport *vport; + + ovs_net = net_generic(dev_net(skb->dev), ovs_net_id); + vport = rcu_dereference(ovs_net->vport_net.gre_vport); + + if (unlikely(!vport)) + return PACKET_REJECT; + else + return PACKET_RCVD; +} + static int gre_tnl_send(struct vport *vport, struct sk_buff *skb) { struct net *net = ovs_dp_get_net(vport->dp); @@ -186,6 +202,7 @@ static int gre_tnl_send(struct vport *vport, struct sk_buff *skb) static struct gre_cisco_protocol gre_protocol = { .handler = gre_rcv, + .err_handler = gre_err, .priority = 1, }; From ad55200734c65a3ec5d0c39d6ea904008baea536 Mon Sep 17 00:00:00 2001 From: Ben Pfaff Date: Tue, 6 May 2014 16:48:38 -0700 Subject: [PATCH 039/268] openvswitch: Fix tracking of flags seen in TCP flows. Flow statistics need to take into account the TCP flags from the packet currently being processed (in 'key'), not the TCP flags matched by the flow found in the kernel flow table (in 'flow'). This bug made the Open vSwitch userspace fin_timeout action have no effect in many cases. This bug is introduced by commit 88d73f6c411ac2f0578 (openvswitch: Use TCP flags in the flow key for stats.) Reported-by: Len Gao Signed-off-by: Ben Pfaff Acked-by: Jarno Rajahalme Acked-by: Jesse Gross Signed-off-by: Pravin B Shelar --- net/openvswitch/datapath.c | 4 ++-- net/openvswitch/flow.c | 4 ++-- net/openvswitch/flow.h | 5 +++-- 3 files changed, 7 insertions(+), 6 deletions(-) diff --git a/net/openvswitch/datapath.c b/net/openvswitch/datapath.c index 0d407bca81e35..a863678c50acc 100644 --- a/net/openvswitch/datapath.c +++ b/net/openvswitch/datapath.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2007-2013 Nicira, Inc. + * Copyright (c) 2007-2014 Nicira, Inc. * * This program is free software; you can redistribute it and/or * modify it under the terms of version 2 of the GNU General Public @@ -276,7 +276,7 @@ void ovs_dp_process_received_packet(struct vport *p, struct sk_buff *skb) OVS_CB(skb)->flow = flow; OVS_CB(skb)->pkt_key = &key; - ovs_flow_stats_update(OVS_CB(skb)->flow, skb); + ovs_flow_stats_update(OVS_CB(skb)->flow, key.tp.flags, skb); ovs_execute_actions(dp, skb); stats_counter = &stats->n_hit; diff --git a/net/openvswitch/flow.c b/net/openvswitch/flow.c index 334751cb15289..d07ab538fc9d3 100644 --- a/net/openvswitch/flow.c +++ b/net/openvswitch/flow.c @@ -61,10 +61,10 @@ u64 ovs_flow_used_time(unsigned long flow_jiffies) #define TCP_FLAGS_BE16(tp) (*(__be16 *)&tcp_flag_word(tp) & htons(0x0FFF)) -void ovs_flow_stats_update(struct sw_flow *flow, struct sk_buff *skb) +void ovs_flow_stats_update(struct sw_flow *flow, __be16 tcp_flags, + struct sk_buff *skb) { struct flow_stats *stats; - __be16 tcp_flags = flow->key.tp.flags; int node = numa_node_id(); stats = rcu_dereference(flow->stats[node]); diff --git a/net/openvswitch/flow.h b/net/openvswitch/flow.h index ac395d2cd8216..5e5aaed3a85b0 100644 --- a/net/openvswitch/flow.h +++ b/net/openvswitch/flow.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2007-2013 Nicira, Inc. + * Copyright (c) 2007-2014 Nicira, Inc. * * This program is free software; you can redistribute it and/or * modify it under the terms of version 2 of the GNU General Public @@ -180,7 +180,8 @@ struct arp_eth_header { unsigned char ar_tip[4]; /* target IP address */ } __packed; -void ovs_flow_stats_update(struct sw_flow *, struct sk_buff *); +void ovs_flow_stats_update(struct sw_flow *, __be16 tcp_flags, + struct sk_buff *); void ovs_flow_stats_get(const struct sw_flow *, struct ovs_flow_stats *, unsigned long *used, __be16 *tcp_flags); void ovs_flow_stats_clear(struct sw_flow *); From 4a46b24e147dfa9b858026da02cad0bdd4e149d2 Mon Sep 17 00:00:00 2001 From: Alex Wang Date: Mon, 30 Jun 2014 20:30:29 -0700 Subject: [PATCH 040/268] openvswitch: Use exact lookup for flow_get and flow_del. Due to the race condition in userspace, there is chance that two overlapping megaflows could be installed in datapath. And this causes userspace unable to delete the less inclusive megaflow flow even after it timeout, since the flow_del logic will stop at the first match of masked flow. This commit fixes the bug by making the kernel flow_del and flow_get logic check all masks in that case. Introduced by 03f0d916a (openvswitch: Mega flow implementation). Signed-off-by: Alex Wang Acked-by: Andy Zhou Signed-off-by: Pravin B Shelar --- net/openvswitch/datapath.c | 23 +++++++++++------------ net/openvswitch/flow_table.c | 16 ++++++++++++++++ net/openvswitch/flow_table.h | 3 ++- 3 files changed, 29 insertions(+), 13 deletions(-) diff --git a/net/openvswitch/datapath.c b/net/openvswitch/datapath.c index a863678c50acc..9db4bf6740d1e 100644 --- a/net/openvswitch/datapath.c +++ b/net/openvswitch/datapath.c @@ -889,8 +889,11 @@ static int ovs_flow_cmd_new(struct sk_buff *skb, struct genl_info *info) } /* The unmasked key has to be the same for flow updates. */ if (unlikely(!ovs_flow_cmp_unmasked_key(flow, &match))) { - error = -EEXIST; - goto err_unlock_ovs; + flow = ovs_flow_tbl_lookup_exact(&dp->table, &match); + if (!flow) { + error = -ENOENT; + goto err_unlock_ovs; + } } /* Update actions. */ old_acts = ovsl_dereference(flow->sf_acts); @@ -981,16 +984,12 @@ static int ovs_flow_cmd_set(struct sk_buff *skb, struct genl_info *info) goto err_unlock_ovs; } /* Check that the flow exists. */ - flow = ovs_flow_tbl_lookup(&dp->table, &key); + flow = ovs_flow_tbl_lookup_exact(&dp->table, &match); if (unlikely(!flow)) { error = -ENOENT; goto err_unlock_ovs; } - /* The unmasked key has to be the same for flow updates. */ - if (unlikely(!ovs_flow_cmp_unmasked_key(flow, &match))) { - error = -EEXIST; - goto err_unlock_ovs; - } + /* Update actions, if present. */ if (likely(acts)) { old_acts = ovsl_dereference(flow->sf_acts); @@ -1063,8 +1062,8 @@ static int ovs_flow_cmd_get(struct sk_buff *skb, struct genl_info *info) goto unlock; } - flow = ovs_flow_tbl_lookup(&dp->table, &key); - if (!flow || !ovs_flow_cmp_unmasked_key(flow, &match)) { + flow = ovs_flow_tbl_lookup_exact(&dp->table, &match); + if (!flow) { err = -ENOENT; goto unlock; } @@ -1113,8 +1112,8 @@ static int ovs_flow_cmd_del(struct sk_buff *skb, struct genl_info *info) goto unlock; } - flow = ovs_flow_tbl_lookup(&dp->table, &key); - if (unlikely(!flow || !ovs_flow_cmp_unmasked_key(flow, &match))) { + flow = ovs_flow_tbl_lookup_exact(&dp->table, &match); + if (unlikely(!flow)) { err = -ENOENT; goto unlock; } diff --git a/net/openvswitch/flow_table.c b/net/openvswitch/flow_table.c index 574c3abc9b307..cf2d853646f05 100644 --- a/net/openvswitch/flow_table.c +++ b/net/openvswitch/flow_table.c @@ -456,6 +456,22 @@ struct sw_flow *ovs_flow_tbl_lookup(struct flow_table *tbl, return ovs_flow_tbl_lookup_stats(tbl, key, &n_mask_hit); } +struct sw_flow *ovs_flow_tbl_lookup_exact(struct flow_table *tbl, + struct sw_flow_match *match) +{ + struct table_instance *ti = rcu_dereference_ovsl(tbl->ti); + struct sw_flow_mask *mask; + struct sw_flow *flow; + + /* Always called under ovs-mutex. */ + list_for_each_entry(mask, &tbl->mask_list, list) { + flow = masked_flow_lookup(ti, match->key, mask); + if (flow && ovs_flow_cmp_unmasked_key(flow, match)) /* Found */ + return flow; + } + return NULL; +} + int ovs_flow_tbl_num_masks(const struct flow_table *table) { struct sw_flow_mask *mask; diff --git a/net/openvswitch/flow_table.h b/net/openvswitch/flow_table.h index ca8a5820f6153..5918bff7f3f6c 100644 --- a/net/openvswitch/flow_table.h +++ b/net/openvswitch/flow_table.h @@ -76,7 +76,8 @@ struct sw_flow *ovs_flow_tbl_lookup_stats(struct flow_table *, u32 *n_mask_hit); struct sw_flow *ovs_flow_tbl_lookup(struct flow_table *, const struct sw_flow_key *); - +struct sw_flow *ovs_flow_tbl_lookup_exact(struct flow_table *tbl, + struct sw_flow_match *match); bool ovs_flow_cmp_unmasked_key(const struct sw_flow *flow, struct sw_flow_match *match); From e9110361a9a4e258b072b14bd44eb78cf11453cb Mon Sep 17 00:00:00 2001 From: Heiko Schocher Date: Tue, 24 Jun 2014 09:25:18 +0200 Subject: [PATCH 041/268] UBI: fix the volumes tree sorting criteria Commig "604b592 UBI: fix rb_tree node comparison in add_map" broke fastmap backward compatibility and older fastmap images cannot be mounted anymore. The reason is that it changes the volumes RB-tree sorting criteria. This patch fixes the problem. Artem: re-write the commit message Signed-off-by: Heiko Schocher Acked-by: Richard Weinberger Signed-off-by: Artem Bityutskiy --- drivers/mtd/ubi/fastmap.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/mtd/ubi/fastmap.c b/drivers/mtd/ubi/fastmap.c index b04e7d059888d..72f39daee649e 100644 --- a/drivers/mtd/ubi/fastmap.c +++ b/drivers/mtd/ubi/fastmap.c @@ -125,7 +125,7 @@ static struct ubi_ainf_volume *add_vol(struct ubi_attach_info *ai, int vol_id, parent = *p; av = rb_entry(parent, struct ubi_ainf_volume, rb); - if (vol_id < av->vol_id) + if (vol_id > av->vol_id) p = &(*p)->rb_left; else p = &(*p)->rb_right; From 7f502361531e9eecb396cf99bdc9e9a59f7ebd7f Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Mon, 30 Jun 2014 01:26:23 -0700 Subject: [PATCH 042/268] ipv4: irq safe sk_dst_[re]set() and ipv4_sk_update_pmtu() fix We have two different ways to handle changes to sk->sk_dst First way (used by TCP) assumes socket lock is owned by caller, and use no extra lock : __sk_dst_set() & __sk_dst_reset() Another way (used by UDP) uses sk_dst_lock because socket lock is not always taken. Note that sk_dst_lock is not softirq safe. These ways are not inter changeable for a given socket type. ipv4_sk_update_pmtu(), added in linux-3.8, added a race, as it used the socket lock as synchronization, but users might be UDP sockets. Instead of converting sk_dst_lock to a softirq safe version, use xchg() as we did for sk_rx_dst in commit e47eb5dfb296b ("udp: ipv4: do not use sk_dst_lock from softirq context") In a follow up patch, we probably can remove sk_dst_lock, as it is only used in IPv6. Signed-off-by: Eric Dumazet Cc: Steffen Klassert Fixes: 9cb3a50c5f63e ("ipv4: Invalidate the socket cached route on pmtu events if possible") Signed-off-by: David S. Miller --- include/net/sock.h | 12 ++++++------ net/ipv4/route.c | 15 ++++++++------- 2 files changed, 14 insertions(+), 13 deletions(-) diff --git a/include/net/sock.h b/include/net/sock.h index 173cae485de19..c556fd9b05acf 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -1768,9 +1768,11 @@ __sk_dst_set(struct sock *sk, struct dst_entry *dst) static inline void sk_dst_set(struct sock *sk, struct dst_entry *dst) { - spin_lock(&sk->sk_dst_lock); - __sk_dst_set(sk, dst); - spin_unlock(&sk->sk_dst_lock); + struct dst_entry *old_dst; + + sk_tx_queue_clear(sk); + old_dst = xchg(&sk->sk_dst_cache, dst); + dst_release(old_dst); } static inline void @@ -1782,9 +1784,7 @@ __sk_dst_reset(struct sock *sk) static inline void sk_dst_reset(struct sock *sk) { - spin_lock(&sk->sk_dst_lock); - __sk_dst_reset(sk); - spin_unlock(&sk->sk_dst_lock); + sk_dst_set(sk, NULL); } struct dst_entry *__sk_dst_check(struct sock *sk, u32 cookie); diff --git a/net/ipv4/route.c b/net/ipv4/route.c index 082239ffe34a1..3162ea923dedb 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -1010,7 +1010,7 @@ void ipv4_sk_update_pmtu(struct sk_buff *skb, struct sock *sk, u32 mtu) const struct iphdr *iph = (const struct iphdr *) skb->data; struct flowi4 fl4; struct rtable *rt; - struct dst_entry *dst; + struct dst_entry *odst = NULL; bool new = false; bh_lock_sock(sk); @@ -1018,16 +1018,17 @@ void ipv4_sk_update_pmtu(struct sk_buff *skb, struct sock *sk, u32 mtu) if (!ip_sk_accept_pmtu(sk)) goto out; - rt = (struct rtable *) __sk_dst_get(sk); + odst = sk_dst_get(sk); - if (sock_owned_by_user(sk) || !rt) { + if (sock_owned_by_user(sk) || !odst) { __ipv4_sk_update_pmtu(skb, sk, mtu); goto out; } __build_flow_key(&fl4, sk, iph, 0, 0, 0, 0, 0); - if (!__sk_dst_check(sk, 0)) { + rt = (struct rtable *)odst; + if (odst->obsolete && odst->ops->check(odst, 0) == NULL) { rt = ip_route_output_flow(sock_net(sk), &fl4, sk); if (IS_ERR(rt)) goto out; @@ -1037,8 +1038,7 @@ void ipv4_sk_update_pmtu(struct sk_buff *skb, struct sock *sk, u32 mtu) __ip_rt_update_pmtu((struct rtable *) rt->dst.path, &fl4, mtu); - dst = dst_check(&rt->dst, 0); - if (!dst) { + if (!dst_check(&rt->dst, 0)) { if (new) dst_release(&rt->dst); @@ -1050,10 +1050,11 @@ void ipv4_sk_update_pmtu(struct sk_buff *skb, struct sock *sk, u32 mtu) } if (new) - __sk_dst_set(sk, &rt->dst); + sk_dst_set(sk, &rt->dst); out: bh_unlock_sock(sk); + dst_release(odst); } EXPORT_SYMBOL_GPL(ipv4_sk_update_pmtu); From 0e2be4c1121ae3dc2771c4d9b99d4c39ea9577d8 Mon Sep 17 00:00:00 2001 From: Thomas Petazzoni Date: Tue, 1 Jul 2014 17:23:06 +0200 Subject: [PATCH 043/268] ARM: mvebu: fix SMP boot for Armada 38x and Armada 375 Z1 in big endian The SMP boot on Armada 38x and Armada 375 Z1 is currently broken in big-endian configurations, and this commit fixes it for both platforms. For Armada 375 Z1, the problem was in the armada_375_smp_cpu1_enable_code part of the code that gets copied to the Crypto SRAM as a work-around for an issue of the Z1 stepping. This piece of code was not switching the CPU core to big-endian, and not endian-swapping the value read from the Resume Address register (the value is stored little-endian). Due to the introduction of the conditional 'rev r1, r1' instruction, the offset between the 'ldr r0, [pc, #4]' instruction and the value it was looking is different between LE and BE configurations. To solve this, we instead use one 'adr' instruction followed by one 'ldr'. For Armada 38x, the problem was simply that the CPU core was not switched to big endian in the secondary CPU startup function. This change was tested in LE and BE configurations on Armada 385, Armada 375 Z1 and Armada 375 A0. Signed-off-by: Thomas Petazzoni Link: https://lkml.kernel.org/r/1404228186-21203-1-git-send-email-thomas.petazzoni@free-electrons.com Signed-off-by: Jason Cooper --- arch/arm/mach-mvebu/headsmp-a9.S | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/arch/arm/mach-mvebu/headsmp-a9.S b/arch/arm/mach-mvebu/headsmp-a9.S index 5925366bc03cc..da5bb292b91cf 100644 --- a/arch/arm/mach-mvebu/headsmp-a9.S +++ b/arch/arm/mach-mvebu/headsmp-a9.S @@ -15,6 +15,8 @@ #include #include +#include + __CPUINIT #define CPU_RESUME_ADDR_REG 0xf10182d4 @@ -22,13 +24,18 @@ .global armada_375_smp_cpu1_enable_code_end armada_375_smp_cpu1_enable_code_start: - ldr r0, [pc, #4] +ARM_BE8(setend be) + adr r0, 1f + ldr r0, [r0] ldr r1, [r0] +ARM_BE8(rev r1, r1) mov pc, r1 +1: .word CPU_RESUME_ADDR_REG armada_375_smp_cpu1_enable_code_end: ENTRY(mvebu_cortex_a9_secondary_startup) +ARM_BE8(setend be) bl v7_invalidate_l1 b secondary_startup ENDPROC(mvebu_cortex_a9_secondary_startup) From 07b0f00964def8af9321cfd6c4a7e84f6362f728 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 26 Jun 2014 00:44:02 -0700 Subject: [PATCH 044/268] bnx2x: fix possible panic under memory stress While it is legal to kfree(NULL), it is not wise to use : put_page(virt_to_head_page(NULL)) BUG: unable to handle kernel paging request at ffffeba400000000 IP: [] virt_to_head_page+0x36/0x44 [bnx2x] Reported-by: Michel Lespinasse Signed-off-by: Eric Dumazet Cc: Ariel Elior Fixes: d46d132cc021 ("bnx2x: use netdev_alloc_frag()") Signed-off-by: David S. Miller --- drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c index 47c5814114e17..4b875da1c7ed2 100644 --- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c +++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c @@ -797,7 +797,8 @@ static void bnx2x_tpa_stop(struct bnx2x *bp, struct bnx2x_fastpath *fp, return; } - bnx2x_frag_free(fp, new_data); + if (new_data) + bnx2x_frag_free(fp, new_data); drop: /* drop the packet and keep the buffer in the bin */ DP(NETIF_MSG_RX_STATUS, From 3b140a678834f55602e50e7d6a47663e230434a7 Mon Sep 17 00:00:00 2001 From: Florian Fainelli Date: Thu, 26 Jun 2014 10:06:44 -0700 Subject: [PATCH 045/268] net: systemport: do not clear IFF_MULTICAST flag The SYSTEMPORT Ethernet MAC supports multicast just fine, it just lacks any sort of Unicast/Broadcast/Multicasting filtering at the Ethernet MAC level since that is handled by the front end Ethernet switch, but that is properly handled by bcm_sysport_set_rx_mode(). Some user-space applications might be relying on the presence of this flag to prevent using multicast sockets, this also prevents that interface from joining the IPv6 all-router mcast group. Signed-off-by: Florian Fainelli Signed-off-by: David S. Miller --- drivers/net/ethernet/broadcom/bcmsysport.c | 6 ------ 1 file changed, 6 deletions(-) diff --git a/drivers/net/ethernet/broadcom/bcmsysport.c b/drivers/net/ethernet/broadcom/bcmsysport.c index 141160ef249ae..f6bccd847ee6b 100644 --- a/drivers/net/ethernet/broadcom/bcmsysport.c +++ b/drivers/net/ethernet/broadcom/bcmsysport.c @@ -1589,12 +1589,6 @@ static int bcm_sysport_probe(struct platform_device *pdev) BUILD_BUG_ON(sizeof(struct bcm_tsb) != 8); dev->needed_headroom += sizeof(struct bcm_tsb); - /* We are interfaced to a switch which handles the multicast - * filtering for us, so we do not support programming any - * multicast hash table in this Ethernet MAC. - */ - dev->flags &= ~IFF_MULTICAST; - /* libphy will adjust the link state accordingly */ netif_carrier_off(dev); From 412bce83ac786197d0b2805c5bcded4d95cdeeb0 Mon Sep 17 00:00:00 2001 From: Florian Fainelli Date: Thu, 26 Jun 2014 10:06:45 -0700 Subject: [PATCH 046/268] net: systemport: fix UniMAC reset logic The UniMAC CMD_SW_RESET bit is not a self-clearing bit, so we need to assert it, wait a bit and clear it manually. As a result, umac_reset() is updated not to return any value. The previous version of the code simply wrote 0 to the CMD register, which would make the busy-waiting loop exit immediately, having zero effect. By writing 0 to the CMD register, we were clearing all bits in the CMD register, and not using the hardware reset default values which are set on purpose. Signed-off-by: Florian Fainelli Signed-off-by: David S. Miller --- drivers/net/ethernet/broadcom/bcmsysport.c | 33 ++++++---------------- 1 file changed, 9 insertions(+), 24 deletions(-) diff --git a/drivers/net/ethernet/broadcom/bcmsysport.c b/drivers/net/ethernet/broadcom/bcmsysport.c index f6bccd847ee6b..d31f7d239064c 100644 --- a/drivers/net/ethernet/broadcom/bcmsysport.c +++ b/drivers/net/ethernet/broadcom/bcmsysport.c @@ -1254,28 +1254,17 @@ static inline void umac_enable_set(struct bcm_sysport_priv *priv, usleep_range(1000, 2000); } -static inline int umac_reset(struct bcm_sysport_priv *priv) +static inline void umac_reset(struct bcm_sysport_priv *priv) { - unsigned int timeout = 0; u32 reg; - int ret = 0; - - umac_writel(priv, 0, UMAC_CMD); - while (timeout++ < 1000) { - reg = umac_readl(priv, UMAC_CMD); - if (!(reg & CMD_SW_RESET)) - break; - - udelay(1); - } - - if (timeout == 1000) { - dev_err(&priv->pdev->dev, - "timeout waiting for MAC to come out of reset\n"); - ret = -ETIMEDOUT; - } - return ret; + reg = umac_readl(priv, UMAC_CMD); + reg |= CMD_SW_RESET; + umac_writel(priv, reg, UMAC_CMD); + udelay(10); + reg = umac_readl(priv, UMAC_CMD); + reg &= ~CMD_SW_RESET; + umac_writel(priv, reg, UMAC_CMD); } static void umac_set_hw_addr(struct bcm_sysport_priv *priv, @@ -1303,11 +1292,7 @@ static int bcm_sysport_open(struct net_device *dev) int ret; /* Reset UniMAC */ - ret = umac_reset(priv); - if (ret) { - netdev_err(dev, "UniMAC reset failed\n"); - return ret; - } + umac_reset(priv); /* Flush TX and RX FIFOs at TOPCTRL level */ topctrl_flush(priv); From 16f62d9bed0cdee5f9341b64cc7144869282122d Mon Sep 17 00:00:00 2001 From: Florian Fainelli Date: Thu, 26 Jun 2014 10:06:46 -0700 Subject: [PATCH 047/268] net: systemport: fix TX NAPI work done return value Although we do not limit the number of packets the TX completion function bcm_sysport_tx_reclaim() is allowed to reclaim, we were still using its return value as-is. This means that we could hit the WARN() in net/core/dev.c where work_done >= budget. Make sure we do exit the NAPI context when the TX ring is empty, and pretend there was no work to do. Signed-off-by: Florian Fainelli Signed-off-by: David S. Miller --- drivers/net/ethernet/broadcom/bcmsysport.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/broadcom/bcmsysport.c b/drivers/net/ethernet/broadcom/bcmsysport.c index d31f7d239064c..5776e503e4c57 100644 --- a/drivers/net/ethernet/broadcom/bcmsysport.c +++ b/drivers/net/ethernet/broadcom/bcmsysport.c @@ -654,13 +654,13 @@ static int bcm_sysport_tx_poll(struct napi_struct *napi, int budget) work_done = bcm_sysport_tx_reclaim(ring->priv, ring); - if (work_done < budget) { + if (work_done == 0) { napi_complete(napi); /* re-enable TX interrupt */ intrl2_1_mask_clear(ring->priv, BIT(ring->index)); } - return work_done; + return 0; } static void bcm_sysport_tx_reclaim_all(struct bcm_sysport_priv *priv) From 0f50ce96b7c0488cffe64255694a2451b4347d09 Mon Sep 17 00:00:00 2001 From: Florian Fainelli Date: Thu, 26 Jun 2014 10:26:20 -0700 Subject: [PATCH 048/268] net: bcmgenet: disable clock before register_netdev As soon as register_netdev() is called, the network device notifiers are running which means that other parts of the kernel, or user-space programs can call the network device ndo_open() callback and use the interface. Disable the Ethernet device clock before we register the network device such that we do not create the following situation: CPU0 CPU1 register_netdev() bcmgenet_open() clk_prepare_enable() clk_disable_unprepare() and leave the hardware block gated off, while we think it should be gated on. Signed-off-by: Florian Fainelli Signed-off-by: David S. Miller --- drivers/net/ethernet/broadcom/genet/bcmgenet.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/net/ethernet/broadcom/genet/bcmgenet.c b/drivers/net/ethernet/broadcom/genet/bcmgenet.c index 5ba1cfbd60da3..d17953cc5f49c 100644 --- a/drivers/net/ethernet/broadcom/genet/bcmgenet.c +++ b/drivers/net/ethernet/broadcom/genet/bcmgenet.c @@ -2535,14 +2535,14 @@ static int bcmgenet_probe(struct platform_device *pdev) netif_set_real_num_tx_queues(priv->dev, priv->hw_params->tx_queues + 1); netif_set_real_num_rx_queues(priv->dev, priv->hw_params->rx_queues + 1); - err = register_netdev(dev); - if (err) - goto err_clk_disable; - /* Turn off the main clock, WOL clock is handled separately */ if (!IS_ERR(priv->clk)) clk_disable_unprepare(priv->clk); + err = register_netdev(dev); + if (err) + goto err; + return err; err_clk_disable: From 219575eb6311ad090e26c675a6e217f5d48780a3 Mon Sep 17 00:00:00 2001 From: Florian Fainelli Date: Thu, 26 Jun 2014 10:26:21 -0700 Subject: [PATCH 049/268] net: bcmgenet: start with carrier off We use the PHY library which will determine the link state for us, make sure we start with a carrier off until libphy has completed the link training. Signed-off-by: Florian Fainelli Signed-off-by: David S. Miller --- drivers/net/ethernet/broadcom/genet/bcmgenet.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/net/ethernet/broadcom/genet/bcmgenet.c b/drivers/net/ethernet/broadcom/genet/bcmgenet.c index d17953cc5f49c..e51e462bc8fdc 100644 --- a/drivers/net/ethernet/broadcom/genet/bcmgenet.c +++ b/drivers/net/ethernet/broadcom/genet/bcmgenet.c @@ -2535,6 +2535,9 @@ static int bcmgenet_probe(struct platform_device *pdev) netif_set_real_num_tx_queues(priv->dev, priv->hw_params->tx_queues + 1); netif_set_real_num_rx_queues(priv->dev, priv->hw_params->rx_queues + 1); + /* libphy will determine the link state */ + netif_carrier_off(dev); + /* Turn off the main clock, WOL clock is handled separately */ if (!IS_ERR(priv->clk)) clk_disable_unprepare(priv->clk); From b758858c5ceb1b30ae7d04dea6c74821bd7c7d69 Mon Sep 17 00:00:00 2001 From: Florian Fainelli Date: Thu, 26 Jun 2014 10:26:22 -0700 Subject: [PATCH 050/268] net: bcmgenet: do not set packet length for RX buffers Hardware will provide this information as soon as we will start processing incoming packets, so there is no need to set the RX buffer length during buffer allocation. Signed-off-by: Florian Fainelli Signed-off-by: David S. Miller --- drivers/net/ethernet/broadcom/genet/bcmgenet.c | 7 ------- 1 file changed, 7 deletions(-) diff --git a/drivers/net/ethernet/broadcom/genet/bcmgenet.c b/drivers/net/ethernet/broadcom/genet/bcmgenet.c index e51e462bc8fdc..16281ad2da12c 100644 --- a/drivers/net/ethernet/broadcom/genet/bcmgenet.c +++ b/drivers/net/ethernet/broadcom/genet/bcmgenet.c @@ -1408,13 +1408,6 @@ static int bcmgenet_alloc_rx_buffers(struct bcmgenet_priv *priv) if (cb->skb) continue; - /* set the DMA descriptor length once and for all - * it will only change if we support dynamically sizing - * priv->rx_buf_len, but we do not - */ - dmadesc_set_length_status(priv, priv->rx_bd_assign_ptr, - priv->rx_buf_len << DMA_BUFLENGTH_SHIFT); - ret = bcmgenet_rx_refill(priv, cb); if (ret) break; From b292d7a10487aee6e74b1c18b8d95b92f40d4a4f Mon Sep 17 00:00:00 2001 From: HATAYAMA Daisuke Date: Wed, 25 Jun 2014 10:09:07 +0900 Subject: [PATCH 051/268] perf/x86/intel: ignore CondChgd bit to avoid false NMI handling MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Currently, any NMI is falsely handled by a NMI handler of NMI watchdog if CondChgd bit in MSR_CORE_PERF_GLOBAL_STATUS MSR is set. For example, we use external NMI to make system panic to get crash dump, but in this case, the external NMI is falsely handled do to the issue. This commit deals with the issue simply by ignoring CondChgd bit. Here is explanation in detail. On x86 NMI watchdog uses performance monitoring feature to periodically signal NMI each time performance counter gets overflowed. intel_pmu_handle_irq() is called as a NMI_LOCAL handler from a NMI handler of NMI watchdog, perf_event_nmi_handler(). It identifies an owner of a given NMI by looking at overflow status bits in MSR_CORE_PERF_GLOBAL_STATUS MSR. If some of the bits are set, then it handles the given NMI as its own NMI. The problem is that the intel_pmu_handle_irq() doesn't distinguish CondChgd bit from other bits. Unlike the other status bits, CondChgd bit doesn't represent overflow status for performance counters. Thus, CondChgd bit cannot be thought of as a mark indicating a given NMI is NMI watchdog's. As a result, if CondChgd bit is set, any NMI is falsely handled by the NMI handler of NMI watchdog. Also, if type of the falsely handled NMI is either NMI_UNKNOWN, NMI_SERR or NMI_IO_CHECK, the corresponding action is never performed until CondChgd bit is cleared. I noticed this behavior on systems with Ivy Bridge processors: Intel Xeon CPU E5-2630 v2 and Intel Xeon CPU E7-8890 v2. On both systems, CondChgd bit in MSR_CORE_PERF_GLOBAL_STATUS MSR has already been set in the beginning at boot. Then the CondChgd bit is immediately cleared by next wrmsr to MSR_CORE_PERF_GLOBAL_CTRL MSR and appears to remain 0. On the other hand, on older processors such as Nehalem, Xeon E7540, CondChgd bit is not set in the beginning at boot. I'm not sure about exact behavior of CondChgd bit, in particular when this bit is set. Although I read Intel System Programmer's Manual to figure out that, the descriptions I found are: In 18.9.1: "The MSR_PERF_GLOBAL_STATUS MSR also provides a ¡sticky bit¢ to indicate changes to the state of performancmonitoring hardware" In Table 35-2 IA-32 Architectural MSRs 63 CondChg: status bits of this register has changed. These are different from the bahviour I see on the actual system as I explained above. At least, I think ignoring CondChgd bit should be enough for NMI watchdog perspective. Signed-off-by: HATAYAMA Daisuke Acked-by: Don Zickus Signed-off-by: Peter Zijlstra Cc: Cc: Arnaldo Carvalho de Melo Cc: Linus Torvalds Cc: linux-kernel@vger.kernel.org Link: http://lkml.kernel.org/r/20140625.103503.409316067.d.hatayama@jp.fujitsu.com Signed-off-by: Ingo Molnar --- arch/x86/kernel/cpu/perf_event_intel.c | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/arch/x86/kernel/cpu/perf_event_intel.c b/arch/x86/kernel/cpu/perf_event_intel.c index adb02aa62af5e..07846d738bdb0 100644 --- a/arch/x86/kernel/cpu/perf_event_intel.c +++ b/arch/x86/kernel/cpu/perf_event_intel.c @@ -1381,6 +1381,15 @@ static int intel_pmu_handle_irq(struct pt_regs *regs) intel_pmu_lbr_read(); + /* + * CondChgd bit 63 doesn't mean any overflow status. Ignore + * and clear the bit. + */ + if (__test_and_clear_bit(63, (unsigned long *)&status)) { + if (!status) + goto done; + } + /* * PEBS overflow sets bit 62 in the global status register */ From 1f9a7268c67f0290837aada443d28fd953ddca90 Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Tue, 24 Jun 2014 10:20:25 +0200 Subject: [PATCH 052/268] perf: Do not allow optimized switch for non-cloned events The context check in perf_event_context_sched_out allows non-cloned context to be part of the optimized schedule out switch. This could move non-cloned context into another workload child. Once this child exits, the context is closed and leaves all original (parent) events in closed state. Any other new cloned event will have closed state and not measure anything. And probably causing other odd bugs. Signed-off-by: Jiri Olsa Signed-off-by: Peter Zijlstra Cc: Cc: Arnaldo Carvalho de Melo Cc: Paul Mackerras Cc: Frederic Weisbecker Cc: Namhyung Kim Cc: Paul Mackerras Cc: Corey Ashford Cc: David Ahern Cc: Jiri Olsa Cc: Linus Torvalds Link: http://lkml.kernel.org/r/1403598026-2310-2-git-send-email-jolsa@kernel.org Signed-off-by: Ingo Molnar --- kernel/events/core.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kernel/events/core.c b/kernel/events/core.c index a33d9a2bcbd73..b0c95f0f06fd3 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -2320,7 +2320,7 @@ static void perf_event_context_sched_out(struct task_struct *task, int ctxn, next_parent = rcu_dereference(next_ctx->parent_ctx); /* If neither context have a parent context; they cannot be clones. */ - if (!parent && !next_parent) + if (!parent || !next_parent) goto unlock; if (next_parent == ctx || next_ctx == parent || next_parent == parent) { From d9daa24720891a88bedb93928f57767da96e5c80 Mon Sep 17 00:00:00 2001 From: Daniel Mack Date: Sat, 28 Jun 2014 01:23:35 +0200 Subject: [PATCH 053/268] net: fix circular dependency in of_mdio code Commit 86f6cf4127 (net: of_mdio: add of_mdiobus_link_phydev()) introduced a circular dependency between libphy and of_mdio. depmod: ERROR: /kernel/drivers/net/phy/libphy.ko in dependency cycle! depmod: ERROR: /kernel/drivers/of/of_mdio.ko in dependency cycle! The problem is that of_mdio.c references &mdio_bus_type and libphy now references of_mdiobus_link_phydev. Fix this by not exporting of_mdiobus_link_phydev() from of_mdio.ko. Make it a static function in mdio_bus.c instead. Signed-off-by: Daniel Mack Reported-by: Jeff Mahoney Fixes: 86f6cf4127 (net: of_mdio: add of_mdiobus_link_phydev()) Reviewed-by: Florian Fainelli Signed-off-by: David S. Miller --- drivers/net/phy/mdio_bus.c | 44 ++++++++++++++++++++++++++++++++++++++ drivers/of/of_mdio.c | 34 ----------------------------- include/linux/of_mdio.h | 8 ------- 3 files changed, 44 insertions(+), 42 deletions(-) diff --git a/drivers/net/phy/mdio_bus.c b/drivers/net/phy/mdio_bus.c index 2e58aa54484c9..4eaadcfcb0fe5 100644 --- a/drivers/net/phy/mdio_bus.c +++ b/drivers/net/phy/mdio_bus.c @@ -187,6 +187,50 @@ struct mii_bus *of_mdio_find_bus(struct device_node *mdio_bus_np) return d ? to_mii_bus(d) : NULL; } EXPORT_SYMBOL(of_mdio_find_bus); + +/* Walk the list of subnodes of a mdio bus and look for a node that matches the + * phy's address with its 'reg' property. If found, set the of_node pointer for + * the phy. This allows auto-probed pyh devices to be supplied with information + * passed in via DT. + */ +static void of_mdiobus_link_phydev(struct mii_bus *mdio, + struct phy_device *phydev) +{ + struct device *dev = &phydev->dev; + struct device_node *child; + + if (dev->of_node || !mdio->dev.of_node) + return; + + for_each_available_child_of_node(mdio->dev.of_node, child) { + int addr; + int ret; + + ret = of_property_read_u32(child, "reg", &addr); + if (ret < 0) { + dev_err(dev, "%s has invalid PHY address\n", + child->full_name); + continue; + } + + /* A PHY must have a reg property in the range [0-31] */ + if (addr >= PHY_MAX_ADDR) { + dev_err(dev, "%s PHY address %i is too large\n", + child->full_name, addr); + continue; + } + + if (addr == phydev->addr) { + dev->of_node = child; + return; + } + } +} +#else /* !IS_ENABLED(CONFIG_OF_MDIO) */ +static inline void of_mdiobus_link_phydev(struct mii_bus *mdio, + struct phy_device *phydev) +{ +} #endif /** diff --git a/drivers/of/of_mdio.c b/drivers/of/of_mdio.c index a3bf2122a8d5b..401b2453da45a 100644 --- a/drivers/of/of_mdio.c +++ b/drivers/of/of_mdio.c @@ -182,40 +182,6 @@ int of_mdiobus_register(struct mii_bus *mdio, struct device_node *np) } EXPORT_SYMBOL(of_mdiobus_register); -/** - * of_mdiobus_link_phydev - Find a device node for a phy - * @mdio: pointer to mii_bus structure - * @phydev: phydev for which the of_node pointer should be set - * - * Walk the list of subnodes of a mdio bus and look for a node that matches the - * phy's address with its 'reg' property. If found, set the of_node pointer for - * the phy. This allows auto-probed pyh devices to be supplied with information - * passed in via DT. - */ -void of_mdiobus_link_phydev(struct mii_bus *mdio, - struct phy_device *phydev) -{ - struct device *dev = &phydev->dev; - struct device_node *child; - - if (dev->of_node || !mdio->dev.of_node) - return; - - for_each_available_child_of_node(mdio->dev.of_node, child) { - int addr; - - addr = of_mdio_parse_addr(&mdio->dev, child); - if (addr < 0) - continue; - - if (addr == phydev->addr) { - dev->of_node = child; - return; - } - } -} -EXPORT_SYMBOL(of_mdiobus_link_phydev); - /* Helper function for of_phy_find_device */ static int of_phy_match(struct device *dev, void *phy_np) { diff --git a/include/linux/of_mdio.h b/include/linux/of_mdio.h index a70c9493d55a4..d449018d07265 100644 --- a/include/linux/of_mdio.h +++ b/include/linux/of_mdio.h @@ -25,9 +25,6 @@ struct phy_device *of_phy_attach(struct net_device *dev, extern struct mii_bus *of_mdio_find_bus(struct device_node *mdio_np); -extern void of_mdiobus_link_phydev(struct mii_bus *mdio, - struct phy_device *phydev); - #else /* CONFIG_OF */ static inline int of_mdiobus_register(struct mii_bus *mdio, struct device_node *np) { @@ -63,11 +60,6 @@ static inline struct mii_bus *of_mdio_find_bus(struct device_node *mdio_np) { return NULL; } - -static inline void of_mdiobus_link_phydev(struct mii_bus *mdio, - struct phy_device *phydev) -{ -} #endif /* CONFIG_OF */ #if defined(CONFIG_OF) && defined(CONFIG_FIXED_PHY) From a48e5fafecfb9c0c807d7e7284b5ff884dfb7a3a Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 2 Jul 2014 02:25:15 -0700 Subject: [PATCH 054/268] vlan: free percpu stats in device destructor Madalin-Cristian reported crashs happening after a recent commit (5a4ae5f6e7d4 "vlan: unnecessary to check if vlan_pcpu_stats is NULL") ----------------------------------------------------------------------- root@p5040ds:~# vconfig add eth8 1 root@p5040ds:~# vconfig rem eth8.1 Unable to handle kernel paging request for data at address 0x2bc88028 Faulting instruction address: 0xc058e950 Oops: Kernel access of bad area, sig: 11 [#1] SMP NR_CPUS=8 CoreNet Generic Modules linked in: CPU: 3 PID: 2167 Comm: vconfig Tainted: G W 3.16.0-rc3-00346-g65e85bf #2 task: e7264d90 ti: e2c2c000 task.ti: e2c2c000 NIP: c058e950 LR: c058ea30 CTR: c058e900 REGS: e2c2db20 TRAP: 0300 Tainted: G W (3.16.0-rc3-00346-g65e85bf) MSR: 00029002 CR: 48000428 XER: 20000000 DEAR: 2bc88028 ESR: 00000000 GPR00: c047299c e2c2dbd0 e7264d90 00000000 2bc88000 00000000 ffffffff 00000000 GPR08: 0000000f 00000000 000000ff 00000000 28000422 10121928 10100000 10100000 GPR16: 10100000 00000000 c07c5968 00000000 00000000 00000000 e2c2dc48 e7838000 GPR24: c07c5bac c07c58a8 e77290cc c07b0000 00000000 c05de6c0 e7838000 e2c2dc48 NIP [c058e950] vlan_dev_get_stats64+0x50/0x170 LR [c058ea30] vlan_dev_get_stats64+0x130/0x170 Call Trace: [e2c2dbd0] [ffffffea] 0xffffffea (unreliable) [e2c2dc20] [c047299c] dev_get_stats+0x4c/0x140 [e2c2dc40] [c0488ca8] rtnl_fill_ifinfo+0x3d8/0x960 [e2c2dd70] [c0489f4c] rtmsg_ifinfo+0x6c/0x110 [e2c2dd90] [c04731d4] rollback_registered_many+0x344/0x3b0 [e2c2ddd0] [c047332c] rollback_registered+0x2c/0x50 [e2c2ddf0] [c0476058] unregister_netdevice_queue+0x78/0xf0 [e2c2de00] [c058d800] unregister_vlan_dev+0xc0/0x160 [e2c2de20] [c058e360] vlan_ioctl_handler+0x1c0/0x550 [e2c2de90] [c045d11c] sock_ioctl+0x28c/0x2f0 [e2c2deb0] [c010d070] do_vfs_ioctl+0x90/0x7b0 [e2c2df20] [c010d7d0] SyS_ioctl+0x40/0x80 [e2c2df40] [c000f924] ret_from_syscall+0x0/0x3c Fix this problem by freeing percpu stats from dev->destructor() instead of ndo_uninit() Reported-by: Madalin-Cristian Bucur Signed-off-by: Eric Dumazet Tested-by: Madalin-Cristian Bucur Fixes: 5a4ae5f6e7d4 ("vlan: unnecessary to check if vlan_pcpu_stats is NULL") Cc: Li RongQing Signed-off-by: David S. Miller --- net/8021q/vlan_dev.c | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) diff --git a/net/8021q/vlan_dev.c b/net/8021q/vlan_dev.c index ad2ac3c003988..dd11f612e03e4 100644 --- a/net/8021q/vlan_dev.c +++ b/net/8021q/vlan_dev.c @@ -627,8 +627,6 @@ static void vlan_dev_uninit(struct net_device *dev) struct vlan_dev_priv *vlan = vlan_dev_priv(dev); int i; - free_percpu(vlan->vlan_pcpu_stats); - vlan->vlan_pcpu_stats = NULL; for (i = 0; i < ARRAY_SIZE(vlan->egress_priority_map); i++) { while ((pm = vlan->egress_priority_map[i]) != NULL) { vlan->egress_priority_map[i] = pm->next; @@ -785,6 +783,15 @@ static const struct net_device_ops vlan_netdev_ops = { .ndo_get_lock_subclass = vlan_dev_get_lock_subclass, }; +static void vlan_dev_free(struct net_device *dev) +{ + struct vlan_dev_priv *vlan = vlan_dev_priv(dev); + + free_percpu(vlan->vlan_pcpu_stats); + vlan->vlan_pcpu_stats = NULL; + free_netdev(dev); +} + void vlan_setup(struct net_device *dev) { ether_setup(dev); @@ -794,7 +801,7 @@ void vlan_setup(struct net_device *dev) dev->tx_queue_len = 0; dev->netdev_ops = &vlan_netdev_ops; - dev->destructor = free_netdev; + dev->destructor = vlan_dev_free; dev->ethtool_ops = &vlan_ethtool_ops; memset(dev->broadcast, 0, ETH_ALEN); From 5925a0555bdaf0b396a84318cbc21ba085f6c0d3 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 2 Jul 2014 02:39:38 -0700 Subject: [PATCH 055/268] net: fix sparse warning in sk_dst_set() sk_dst_cache has __rcu annotation, so we need a cast to avoid following sparse error : include/net/sock.h:1774:19: warning: incorrect type in initializer (different address spaces) include/net/sock.h:1774:19: expected struct dst_entry [noderef] *__ret include/net/sock.h:1774:19: got struct dst_entry *dst Signed-off-by: Eric Dumazet Reported-by: kbuild test robot Fixes: 7f502361531e ("ipv4: irq safe sk_dst_[re]set() and ipv4_sk_update_pmtu() fix") Signed-off-by: David S. Miller --- include/net/sock.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/net/sock.h b/include/net/sock.h index c556fd9b05acf..1563507457002 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -1771,7 +1771,7 @@ sk_dst_set(struct sock *sk, struct dst_entry *dst) struct dst_entry *old_dst; sk_tx_queue_clear(sk); - old_dst = xchg(&sk->sk_dst_cache, dst); + old_dst = xchg((__force struct dst_entry **)&sk->sk_dst_cache, dst); dst_release(old_dst); } From 5924f17a8a30c2ae18d034a86ee7581b34accef6 Mon Sep 17 00:00:00 2001 From: Christoph Paasch Date: Sat, 28 Jun 2014 18:26:37 +0200 Subject: [PATCH 056/268] tcp: Fix divide by zero when pushing during tcp-repair When in repair-mode and TCP_RECV_QUEUE is set, we end up calling tcp_push with mss_now being 0. If data is in the send-queue and tcp_set_skb_tso_segs gets called, we crash because it will divide by mss_now: [ 347.151939] divide error: 0000 [#1] SMP [ 347.152907] Modules linked in: [ 347.152907] CPU: 1 PID: 1123 Comm: packetdrill Not tainted 3.16.0-rc2 #4 [ 347.152907] Hardware name: Bochs Bochs, BIOS Bochs 01/01/2007 [ 347.152907] task: f5b88540 ti: f3c82000 task.ti: f3c82000 [ 347.152907] EIP: 0060:[] EFLAGS: 00210246 CPU: 1 [ 347.152907] EIP is at tcp_set_skb_tso_segs+0x49/0xa0 [ 347.152907] EAX: 00000b67 EBX: f5acd080 ECX: 00000000 EDX: 00000000 [ 347.152907] ESI: f5a28f40 EDI: f3c88f00 EBP: f3c83d10 ESP: f3c83d00 [ 347.152907] DS: 007b ES: 007b FS: 00d8 GS: 0033 SS: 0068 [ 347.152907] CR0: 80050033 CR2: 083158b0 CR3: 35146000 CR4: 000006b0 [ 347.152907] Stack: [ 347.152907] c167f9d9 f5acd080 000005b4 00000002 f3c83d20 c16013e6 f3c88f00 f5acd080 [ 347.152907] f3c83da0 c1603b5a f3c83d38 c10a0188 00000000 00000000 f3c83d84 c10acc85 [ 347.152907] c1ad5ec0 00000000 00000000 c1ad679c 010003e0 00000000 00000000 f3c88fc8 [ 347.152907] Call Trace: [ 347.152907] [] ? apic_timer_interrupt+0x2d/0x34 [ 347.152907] [] tcp_init_tso_segs+0x36/0x50 [ 347.152907] [] tcp_write_xmit+0x7a/0xbf0 [ 347.152907] [] ? up+0x28/0x40 [ 347.152907] [] ? console_unlock+0x295/0x480 [ 347.152907] [] ? vprintk_emit+0x1ef/0x4b0 [ 347.152907] [] __tcp_push_pending_frames+0x36/0xd0 [ 347.152907] [] tcp_push+0xf0/0x120 [ 347.152907] [] tcp_sendmsg+0xf1/0xbf0 [ 347.152907] [] ? kmem_cache_free+0xf0/0x120 [ 347.152907] [] ? __sigqueue_free+0x32/0x40 [ 347.152907] [] ? __sigqueue_free+0x32/0x40 [ 347.152907] [] ? do_wp_page+0x3e0/0x850 [ 347.152907] [] inet_sendmsg+0x4a/0xb0 [ 347.152907] [] ? handle_mm_fault+0x709/0xfb0 [ 347.152907] [] sock_aio_write+0xbb/0xd0 [ 347.152907] [] do_sync_write+0x69/0xa0 [ 347.152907] [] vfs_write+0x123/0x160 [ 347.152907] [] SyS_write+0x55/0xb0 [ 347.152907] [] sysenter_do_call+0x12/0x28 This can easily be reproduced with the following packetdrill-script (the "magic" with netem, sk_pacing and limit_output_bytes is done to prevent the kernel from pushing all segments, because hitting the limit without doing this is not so easy with packetdrill): 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3 +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0 +0 bind(3, ..., ...) = 0 +0 listen(3, 1) = 0 +0 < S 0:0(0) win 32792 +0 > S. 0:0(0) ack 1 +0.1 < . 1:1(0) ack 1 win 65000 +0 accept(3, ..., ...) = 4 // This forces that not all segments of the snd-queue will be pushed +0 `tc qdisc add dev tun0 root netem delay 10ms` +0 `sysctl -w net.ipv4.tcp_limit_output_bytes=2` +0 setsockopt(4, SOL_SOCKET, 47, [2], 4) = 0 +0 write(4,...,10000) = 10000 +0 write(4,...,10000) = 10000 // Set tcp-repair stuff, particularly TCP_RECV_QUEUE +0 setsockopt(4, SOL_TCP, 19, [1], 4) = 0 +0 setsockopt(4, SOL_TCP, 20, [1], 4) = 0 // This now will make the write push the remaining segments +0 setsockopt(4, SOL_SOCKET, 47, [20000], 4) = 0 +0 `sysctl -w net.ipv4.tcp_limit_output_bytes=130000` // Now we will crash +0 write(4,...,1000) = 1000 This happens since ec3423257508 (tcp: fix retransmission in repair mode). Prior to that, the call to tcp_push was prevented by a check for tp->repair. The patch fixes it, by adding the new goto-label out_nopush. When exiting tcp_sendmsg and a push is not required, which is the case for tp->repair, we go to this label. When repairing and calling send() with TCP_RECV_QUEUE, the data is actually put in the receive-queue. So, no push is required because no data has been added to the send-queue. Cc: Andrew Vagin Cc: Pavel Emelyanov Fixes: ec3423257508 (tcp: fix retransmission in repair mode) Signed-off-by: Christoph Paasch Acked-by: Andrew Vagin Acked-by: Pavel Emelyanov Signed-off-by: David S. Miller --- net/ipv4/tcp.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index eb1dde37e678f..9d2118e5fbc79 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -1108,7 +1108,7 @@ int tcp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, if (unlikely(tp->repair)) { if (tp->repair_queue == TCP_RECV_QUEUE) { copied = tcp_send_rcvq(sk, msg, size); - goto out; + goto out_nopush; } err = -EINVAL; @@ -1282,6 +1282,7 @@ int tcp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, out: if (copied) tcp_push(sk, flags, mss_now, tp->nonagle, size_goal); +out_nopush: release_sock(sk); return copied + copied_syn; From f46d53d0e9151ea9553361d2bf044ba555350e5f Mon Sep 17 00:00:00 2001 From: "Maciej W. Rozycki" Date: Sun, 29 Jun 2014 01:45:53 +0100 Subject: [PATCH 057/268] defxx: Remove an incorrectly inverted preprocessor conditional The RX handler of the driver has two paths switched between, depending on the size of the frame received, as determined by SKBUFF_RX_COPYBREAK. When a small frame is received, a new skb allocated has data space large enough to hold the incoming frame only, and data is copied there from the original skb whose buffer is returned to the DMA RX ring; in that case `rx_in_place' is 0. When a large frame is received, a new skb allocated has data space large enough to hold the largest frame possible, including the overhead for alignment, the receive status and padding, over 4.5kiB overall, and its buffer is placed on the DMA RX ring while the original buffer is passed up to the network stack avoiding the need to copy data; in that case `rx_in_place' is 1. However the latter scenario is only possible when dynamic buffers are used, as determined by DYNAMIC_BUFFERS, because otherwise the buffers used for the DMA RX ring are fixed at the time the interface is brought up. That leads to an observation that the preprocessor conditional around the `rx_in_place' check is inverted, the check only really matters when dynamic buffers are in use. It has gone unnoticed for many years since support for using dynamic buffers on the DMA RX ring was introduced in 2.1.40 -- because the only problem that results is in the case where `rx_in_place' is 1 frame data received is unnecessarily copied to the newly-allocated buffer, before the buffer placed on the the DMA receive RX and its contents ignored. Therefore the only symptom is some performance loss. Rather than flipping the condition though I decided to discard the conditional altogether -- in the case of static buffers `rx_in_place' is always 0 so GCC will optimise the C conditional away instead. Tested on a few DEFPA and DEFTA boards successfully using both small and large frames, both with DYNAMIC_BUFFERS defined and with the macro undefined. Signed-off-by: Maciej W. Rozycki Signed-off-by: David S. Miller --- drivers/net/fddi/defxx.c | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/drivers/net/fddi/defxx.c b/drivers/net/fddi/defxx.c index eb78203cd58e2..052927be074b3 100644 --- a/drivers/net/fddi/defxx.c +++ b/drivers/net/fddi/defxx.c @@ -3074,10 +3074,7 @@ static void dfx_rcv_queue_process( break; } else { -#ifndef DYNAMIC_BUFFERS - if (! rx_in_place) -#endif - { + if (!rx_in_place) { /* Receive buffer allocated, pass receive packet up */ skb_copy_to_linear_data(skb, From 1b037474d0c0b5ceb65bc809e3d8ac4497ee041b Mon Sep 17 00:00:00 2001 From: "Maciej W. Rozycki" Date: Sun, 29 Jun 2014 02:09:19 +0100 Subject: [PATCH 058/268] defxx: Fix !DYNAMIC_BUFFERS compilation warnings This fixes compilation warnings: drivers/net/fddi/defxx.c:294: warning: 'dfx_rcv_flush' declared inline after being called drivers/net/fddi/defxx.c:294: warning: previous declaration of 'dfx_rcv_flush' was here drivers/net/fddi/defxx.c:2854: warning: 'my_skb_align' defined but not used triggered when the driver is built with DYNAMIC_BUFFERS undefined. Code tested to work just fine with these changes and a few DEFPA and DEFTA boards. Signed-off-by: Maciej W. Rozycki Signed-off-by: David S. Miller --- drivers/net/fddi/defxx.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/drivers/net/fddi/defxx.c b/drivers/net/fddi/defxx.c index 052927be074b3..2aa57270838fb 100644 --- a/drivers/net/fddi/defxx.c +++ b/drivers/net/fddi/defxx.c @@ -291,7 +291,11 @@ static int dfx_hw_dma_uninit(DFX_board_t *bp, PI_UINT32 type); static int dfx_rcv_init(DFX_board_t *bp, int get_buffers); static void dfx_rcv_queue_process(DFX_board_t *bp); +#ifdef DYNAMIC_BUFFERS static void dfx_rcv_flush(DFX_board_t *bp); +#else +static inline void dfx_rcv_flush(DFX_board_t *bp) {} +#endif static netdev_tx_t dfx_xmt_queue_pkt(struct sk_buff *skb, struct net_device *dev); @@ -2849,7 +2853,7 @@ static int dfx_hw_dma_uninit(DFX_board_t *bp, PI_UINT32 type) * Align an sk_buff to a boundary power of 2 * */ - +#ifdef DYNAMIC_BUFFERS static void my_skb_align(struct sk_buff *skb, int n) { unsigned long x = (unsigned long)skb->data; @@ -2859,7 +2863,7 @@ static void my_skb_align(struct sk_buff *skb, int n) skb_reserve(skb, v - x); } - +#endif /* * ================ @@ -3450,10 +3454,6 @@ static void dfx_rcv_flush( DFX_board_t *bp ) } } -#else -static inline void dfx_rcv_flush( DFX_board_t *bp ) -{ -} #endif /* DYNAMIC_BUFFERS */ /* From 35f6f45368632f21bd27559c44dbb1cab51d8947 Mon Sep 17 00:00:00 2001 From: Amir Vadai Date: Sun, 29 Jun 2014 11:54:55 +0300 Subject: [PATCH 059/268] net/mlx4_en: Don't use irq_affinity_notifier to track changes in IRQ affinity map IRQ affinity notifier can only have a single notifier - cpu_rmap notifier. Can't use it to track changes in IRQ affinity map. Detect IRQ affinity changes by comparing CPU to current IRQ affinity map during NAPI poll thread. CC: Thomas Gleixner CC: Ben Hutchings Fixes: 2eacc23 ("net/mlx4_core: Enforce irq affinity changes immediatly") Signed-off-by: Amir Vadai Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlx4/cq.c | 2 - drivers/net/ethernet/mellanox/mlx4/en_cq.c | 4 ++ drivers/net/ethernet/mellanox/mlx4/en_rx.c | 16 ++++- drivers/net/ethernet/mellanox/mlx4/en_tx.c | 6 -- drivers/net/ethernet/mellanox/mlx4/eq.c | 69 +++----------------- drivers/net/ethernet/mellanox/mlx4/mlx4_en.h | 1 + include/linux/mlx4/device.h | 4 +- 7 files changed, 28 insertions(+), 74 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx4/cq.c b/drivers/net/ethernet/mellanox/mlx4/cq.c index 80f725228f5b7..56022d6478370 100644 --- a/drivers/net/ethernet/mellanox/mlx4/cq.c +++ b/drivers/net/ethernet/mellanox/mlx4/cq.c @@ -294,8 +294,6 @@ int mlx4_cq_alloc(struct mlx4_dev *dev, int nent, init_completion(&cq->free); cq->irq = priv->eq_table.eq[cq->vector].irq; - cq->irq_affinity_change = false; - return 0; err_radix: diff --git a/drivers/net/ethernet/mellanox/mlx4/en_cq.c b/drivers/net/ethernet/mellanox/mlx4/en_cq.c index 4b2130760eede..1213cc71348c3 100644 --- a/drivers/net/ethernet/mellanox/mlx4/en_cq.c +++ b/drivers/net/ethernet/mellanox/mlx4/en_cq.c @@ -128,6 +128,10 @@ int mlx4_en_activate_cq(struct mlx4_en_priv *priv, struct mlx4_en_cq *cq, mlx4_warn(mdev, "Failed assigning an EQ to %s, falling back to legacy EQ's\n", name); } + + cq->irq_desc = + irq_to_desc(mlx4_eq_get_irq(mdev->dev, + cq->vector)); } } else { cq->vector = (cq->ring + 1 + priv->port) % diff --git a/drivers/net/ethernet/mellanox/mlx4/en_rx.c b/drivers/net/ethernet/mellanox/mlx4/en_rx.c index d2d415732d994..96724170308a3 100644 --- a/drivers/net/ethernet/mellanox/mlx4/en_rx.c +++ b/drivers/net/ethernet/mellanox/mlx4/en_rx.c @@ -40,6 +40,7 @@ #include #include #include +#include #include "mlx4_en.h" @@ -896,16 +897,25 @@ int mlx4_en_poll_rx_cq(struct napi_struct *napi, int budget) /* If we used up all the quota - we're probably not done yet... */ if (done == budget) { + int cpu_curr; + const struct cpumask *aff; + INC_PERF_COUNTER(priv->pstats.napi_quota); - if (unlikely(cq->mcq.irq_affinity_change)) { - cq->mcq.irq_affinity_change = false; + + cpu_curr = smp_processor_id(); + aff = irq_desc_get_irq_data(cq->irq_desc)->affinity; + + if (unlikely(!cpumask_test_cpu(cpu_curr, aff))) { + /* Current cpu is not according to smp_irq_affinity - + * probably affinity changed. need to stop this NAPI + * poll, and restart it on the right CPU + */ napi_complete(napi); mlx4_en_arm_cq(priv, cq); return 0; } } else { /* Done for now */ - cq->mcq.irq_affinity_change = false; napi_complete(napi); mlx4_en_arm_cq(priv, cq); } diff --git a/drivers/net/ethernet/mellanox/mlx4/en_tx.c b/drivers/net/ethernet/mellanox/mlx4/en_tx.c index 8be7483f82368..ac3dead3792cf 100644 --- a/drivers/net/ethernet/mellanox/mlx4/en_tx.c +++ b/drivers/net/ethernet/mellanox/mlx4/en_tx.c @@ -474,15 +474,9 @@ int mlx4_en_poll_tx_cq(struct napi_struct *napi, int budget) /* If we used up all the quota - we're probably not done yet... */ if (done < budget) { /* Done for now */ - cq->mcq.irq_affinity_change = false; napi_complete(napi); mlx4_en_arm_cq(priv, cq); return done; - } else if (unlikely(cq->mcq.irq_affinity_change)) { - cq->mcq.irq_affinity_change = false; - napi_complete(napi); - mlx4_en_arm_cq(priv, cq); - return 0; } return budget; } diff --git a/drivers/net/ethernet/mellanox/mlx4/eq.c b/drivers/net/ethernet/mellanox/mlx4/eq.c index d954ec1eac173..2a004b347e1dd 100644 --- a/drivers/net/ethernet/mellanox/mlx4/eq.c +++ b/drivers/net/ethernet/mellanox/mlx4/eq.c @@ -53,11 +53,6 @@ enum { MLX4_EQ_ENTRY_SIZE = 0x20 }; -struct mlx4_irq_notify { - void *arg; - struct irq_affinity_notify notify; -}; - #define MLX4_EQ_STATUS_OK ( 0 << 28) #define MLX4_EQ_STATUS_WRITE_FAIL (10 << 28) #define MLX4_EQ_OWNER_SW ( 0 << 24) @@ -1088,57 +1083,6 @@ static void mlx4_unmap_clr_int(struct mlx4_dev *dev) iounmap(priv->clr_base); } -static void mlx4_irq_notifier_notify(struct irq_affinity_notify *notify, - const cpumask_t *mask) -{ - struct mlx4_irq_notify *n = container_of(notify, - struct mlx4_irq_notify, - notify); - struct mlx4_priv *priv = (struct mlx4_priv *)n->arg; - struct radix_tree_iter iter; - void **slot; - - radix_tree_for_each_slot(slot, &priv->cq_table.tree, &iter, 0) { - struct mlx4_cq *cq = (struct mlx4_cq *)(*slot); - - if (cq->irq == notify->irq) - cq->irq_affinity_change = true; - } -} - -static void mlx4_release_irq_notifier(struct kref *ref) -{ - struct mlx4_irq_notify *n = container_of(ref, struct mlx4_irq_notify, - notify.kref); - kfree(n); -} - -static void mlx4_assign_irq_notifier(struct mlx4_priv *priv, - struct mlx4_dev *dev, int irq) -{ - struct mlx4_irq_notify *irq_notifier = NULL; - int err = 0; - - irq_notifier = kzalloc(sizeof(*irq_notifier), GFP_KERNEL); - if (!irq_notifier) { - mlx4_warn(dev, "Failed to allocate irq notifier. irq %d\n", - irq); - return; - } - - irq_notifier->notify.irq = irq; - irq_notifier->notify.notify = mlx4_irq_notifier_notify; - irq_notifier->notify.release = mlx4_release_irq_notifier; - irq_notifier->arg = priv; - err = irq_set_affinity_notifier(irq, &irq_notifier->notify); - if (err) { - kfree(irq_notifier); - irq_notifier = NULL; - mlx4_warn(dev, "Failed to set irq notifier. irq %d\n", irq); - } -} - - int mlx4_alloc_eq_table(struct mlx4_dev *dev) { struct mlx4_priv *priv = mlx4_priv(dev); @@ -1409,8 +1353,6 @@ int mlx4_assign_eq(struct mlx4_dev *dev, char *name, struct cpu_rmap *rmap, continue; /*we dont want to break here*/ } - mlx4_assign_irq_notifier(priv, dev, - priv->eq_table.eq[vec].irq); eq_set_ci(&priv->eq_table.eq[vec], 1); } @@ -1427,6 +1369,14 @@ int mlx4_assign_eq(struct mlx4_dev *dev, char *name, struct cpu_rmap *rmap, } EXPORT_SYMBOL(mlx4_assign_eq); +int mlx4_eq_get_irq(struct mlx4_dev *dev, int vec) +{ + struct mlx4_priv *priv = mlx4_priv(dev); + + return priv->eq_table.eq[vec].irq; +} +EXPORT_SYMBOL(mlx4_eq_get_irq); + void mlx4_release_eq(struct mlx4_dev *dev, int vec) { struct mlx4_priv *priv = mlx4_priv(dev); @@ -1438,9 +1388,6 @@ void mlx4_release_eq(struct mlx4_dev *dev, int vec) Belonging to a legacy EQ*/ mutex_lock(&priv->msix_ctl.pool_lock); if (priv->msix_ctl.pool_bm & 1ULL << i) { - irq_set_affinity_notifier( - priv->eq_table.eq[vec].irq, - NULL); free_irq(priv->eq_table.eq[vec].irq, &priv->eq_table.eq[vec]); priv->msix_ctl.pool_bm &= ~(1ULL << i); diff --git a/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h b/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h index 0e15295bedd67..624e1939e9ee3 100644 --- a/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h +++ b/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h @@ -343,6 +343,7 @@ struct mlx4_en_cq { #define CQ_USER_PEND (MLX4_EN_CQ_STATE_POLL | MLX4_EN_CQ_STATE_POLL_YIELD) spinlock_t poll_lock; /* protects from LLS/napi conflicts */ #endif /* CONFIG_NET_RX_BUSY_POLL */ + struct irq_desc *irq_desc; }; struct mlx4_en_port_profile { diff --git a/include/linux/mlx4/device.h b/include/linux/mlx4/device.h index b12f4bbd064ce..35b51e7af8865 100644 --- a/include/linux/mlx4/device.h +++ b/include/linux/mlx4/device.h @@ -578,8 +578,6 @@ struct mlx4_cq { u32 cons_index; u16 irq; - bool irq_affinity_change; - __be32 *set_ci_db; __be32 *arm_db; int arm_sn; @@ -1167,6 +1165,8 @@ int mlx4_assign_eq(struct mlx4_dev *dev, char *name, struct cpu_rmap *rmap, int *vector); void mlx4_release_eq(struct mlx4_dev *dev, int vec); +int mlx4_eq_get_irq(struct mlx4_dev *dev, int vec); + int mlx4_get_phys_port_id(struct mlx4_dev *dev); int mlx4_wol_read(struct mlx4_dev *dev, u64 *config, int port); int mlx4_wol_write(struct mlx4_dev *dev, u64 config, int port); From 143b5ba21b2bd5091cd8dcd92de7ba1ed1d1c83c Mon Sep 17 00:00:00 2001 From: Amir Vadai Date: Sun, 29 Jun 2014 11:54:56 +0300 Subject: [PATCH 060/268] lib/cpumask: cpumask_set_cpu_local_first to use all cores when numa node is not defined When device is non numa aware (numa_node == -1), use all online cpu's. Signed-off-by: Amir Vadai Signed-off-by: David S. Miller --- lib/cpumask.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/cpumask.c b/lib/cpumask.c index c101230658ebc..b6513a9f28920 100644 --- a/lib/cpumask.c +++ b/lib/cpumask.c @@ -191,7 +191,7 @@ int cpumask_set_cpu_local_first(int i, int numa_node, cpumask_t *dstp) i %= num_online_cpus(); - if (!cpumask_of_node(numa_node)) { + if (numa_node == -1 || !cpumask_of_node(numa_node)) { /* Use all online cpu's for non numa aware system */ cpumask_copy(mask, cpu_online_mask); } else { From bb273617a65b9ed75f8cf9417206cbfcfb41fc48 Mon Sep 17 00:00:00 2001 From: Amir Vadai Date: Sun, 29 Jun 2014 11:54:57 +0300 Subject: [PATCH 061/268] net/mlx4_en: IRQ affinity hint is not cleared on port down Need to remove affinity hint at mlx4_en_deactivate_cq() and not at mlx4_en_destroy_cq() - since affinity_mask might be free'd while still being used by procfs. Signed-off-by: Amir Vadai Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlx4/en_cq.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx4/en_cq.c b/drivers/net/ethernet/mellanox/mlx4/en_cq.c index 1213cc71348c3..14c00048bbec6 100644 --- a/drivers/net/ethernet/mellanox/mlx4/en_cq.c +++ b/drivers/net/ethernet/mellanox/mlx4/en_cq.c @@ -191,8 +191,6 @@ void mlx4_en_destroy_cq(struct mlx4_en_priv *priv, struct mlx4_en_cq **pcq) mlx4_en_unmap_buffer(&cq->wqres.buf); mlx4_free_hwq_res(mdev->dev, &cq->wqres, cq->buf_size); if (priv->mdev->dev->caps.comp_pool && cq->vector) { - if (!cq->is_tx) - irq_set_affinity_hint(cq->mcq.irq, NULL); mlx4_release_eq(priv->mdev->dev, cq->vector); } cq->vector = 0; @@ -208,6 +206,7 @@ void mlx4_en_deactivate_cq(struct mlx4_en_priv *priv, struct mlx4_en_cq *cq) if (!cq->is_tx) { napi_hash_del(&cq->napi); synchronize_rcu(); + irq_set_affinity_hint(cq->mcq.irq, NULL); } netif_napi_del(&cq->napi); From 48bc03433cfdcac7a3bbb233d5c9f95297a0f5ab Mon Sep 17 00:00:00 2001 From: Alexander Aring Date: Sun, 29 Jun 2014 13:10:18 +0200 Subject: [PATCH 062/268] ieee802154: reassembly: fix possible buffer overflow The max_dsize attribute in ctl_table for lowpan_frags_ns_ctl_table is configured with integer accessing methods. This patch change the max_dsize attribute to int to avoid a possible buffer overflow. Signed-off-by: Alexander Aring Signed-off-by: David S. Miller --- include/net/netns/ieee802154_6lowpan.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/net/netns/ieee802154_6lowpan.h b/include/net/netns/ieee802154_6lowpan.h index 079030c853d85..e2070960bac00 100644 --- a/include/net/netns/ieee802154_6lowpan.h +++ b/include/net/netns/ieee802154_6lowpan.h @@ -16,7 +16,7 @@ struct netns_sysctl_lowpan { struct netns_ieee802154_lowpan { struct netns_sysctl_lowpan sysctl; struct netns_frags frags; - u16 max_dsize; + int max_dsize; }; #endif From 0acf16768740776feffac506ce93b1c06c059ac6 Mon Sep 17 00:00:00 2001 From: Vince Bridgers Date: Sun, 29 Jun 2014 20:34:51 -0500 Subject: [PATCH 063/268] net: stmmac: add platform init/exit for Altera's ARM socfpga This patch adds platform init/exit functions and modifications to support suspend/resume for the Altera Cyclone 5 SOC Ethernet controller. The platform exit function puts the controller into reset using the socfpga reset controller driver. The platform init function sets up the Synopsys mac by first making sure the Ethernet controller is held in reset, programming the phy mode through external support logic, then deasserts reset through the socfpga reset manager driver. Signed-off-by: Vince Bridgers Signed-off-by: David S. Miller --- .../ethernet/stmicro/stmmac/dwmac-socfpga.c | 69 +++++++++++++++++++ .../net/ethernet/stmicro/stmmac/stmmac_main.c | 4 ++ 2 files changed, 73 insertions(+) diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-socfpga.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-socfpga.c index fd8a217556a14..ec632e666c56a 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac-socfpga.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-socfpga.c @@ -20,7 +20,9 @@ #include #include #include +#include #include +#include "stmmac.h" #define SYSMGR_EMACGRP_CTRL_PHYSEL_ENUM_GMII_MII 0x0 #define SYSMGR_EMACGRP_CTRL_PHYSEL_ENUM_RGMII 0x1 @@ -34,6 +36,7 @@ struct socfpga_dwmac { u32 reg_shift; struct device *dev; struct regmap *sys_mgr_base_addr; + struct reset_control *stmmac_rst; }; static int socfpga_dwmac_parse_data(struct socfpga_dwmac *dwmac, struct device *dev) @@ -43,6 +46,13 @@ static int socfpga_dwmac_parse_data(struct socfpga_dwmac *dwmac, struct device * u32 reg_offset, reg_shift; int ret; + dwmac->stmmac_rst = devm_reset_control_get(dev, + STMMAC_RESOURCE_NAME); + if (IS_ERR(dwmac->stmmac_rst)) { + dev_info(dev, "Could not get reset control!\n"); + return -EINVAL; + } + dwmac->interface = of_get_phy_mode(np); sys_mgr_base_addr = syscon_regmap_lookup_by_phandle(np, "altr,sysmgr-syscon"); @@ -125,6 +135,65 @@ static void *socfpga_dwmac_probe(struct platform_device *pdev) return dwmac; } +static void socfpga_dwmac_exit(struct platform_device *pdev, void *priv) +{ + struct socfpga_dwmac *dwmac = priv; + + /* On socfpga platform exit, assert and hold reset to the + * enet controller - the default state after a hard reset. + */ + if (dwmac->stmmac_rst) + reset_control_assert(dwmac->stmmac_rst); +} + +static int socfpga_dwmac_init(struct platform_device *pdev, void *priv) +{ + struct socfpga_dwmac *dwmac = priv; + struct net_device *ndev = platform_get_drvdata(pdev); + struct stmmac_priv *stpriv = NULL; + int ret = 0; + + if (ndev) + stpriv = netdev_priv(ndev); + + /* Assert reset to the enet controller before changing the phy mode */ + if (dwmac->stmmac_rst) + reset_control_assert(dwmac->stmmac_rst); + + /* Setup the phy mode in the system manager registers according to + * devicetree configuration + */ + ret = socfpga_dwmac_setup(dwmac); + + /* Deassert reset for the phy configuration to be sampled by + * the enet controller, and operation to start in requested mode + */ + if (dwmac->stmmac_rst) + reset_control_deassert(dwmac->stmmac_rst); + + /* Before the enet controller is suspended, the phy is suspended. + * This causes the phy clock to be gated. The enet controller is + * resumed before the phy, so the clock is still gated "off" when + * the enet controller is resumed. This code makes sure the phy + * is "resumed" before reinitializing the enet controller since + * the enet controller depends on an active phy clock to complete + * a DMA reset. A DMA reset will "time out" if executed + * with no phy clock input on the Synopsys enet controller. + * Verified through Synopsys Case #8000711656. + * + * Note that the phy clock is also gated when the phy is isolated. + * Phy "suspend" and "isolate" controls are located in phy basic + * control register 0, and can be modified by the phy driver + * framework. + */ + if (stpriv && stpriv->phydev) + phy_resume(stpriv->phydev); + + return ret; +} + const struct stmmac_of_data socfpga_gmac_data = { .setup = socfpga_dwmac_probe, + .init = socfpga_dwmac_init, + .exit = socfpga_dwmac_exit, }; diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c index 057a1208e5947..18315f34938bd 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c @@ -2878,6 +2878,10 @@ int stmmac_suspend(struct net_device *ndev) clk_disable_unprepare(priv->stmmac_clk); } spin_unlock_irqrestore(&priv->lock, flags); + + priv->oldlink = 0; + priv->speed = 0; + priv->oldduplex = -1; return 0; } From 43d24e48940d04f587818fadc5305b109f5cb8cf Mon Sep 17 00:00:00 2001 From: Vince Bridgers Date: Sun, 29 Jun 2014 20:34:52 -0500 Subject: [PATCH 064/268] net: stmmac: Correct duplicate if/then/else case found by cppcheck Cppcheck found a duplicate if/then/else case where a receive descriptor was being processed. This patch corrects that issue. cppcheck --force --enable=all --inline-suppr . ... Checking enh_desc.c... [enh_desc.c:148] -> [enh_desc.c:144]: (style) Found duplicate if expressions. ... Signed-off-by: Vince Bridgers Signed-off-by: David S. Miller --- drivers/net/ethernet/stmicro/stmmac/enh_desc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/stmicro/stmmac/enh_desc.c b/drivers/net/ethernet/stmicro/stmmac/enh_desc.c index 7e6628a91514d..1e2bcf5f89e13 100644 --- a/drivers/net/ethernet/stmicro/stmmac/enh_desc.c +++ b/drivers/net/ethernet/stmicro/stmmac/enh_desc.c @@ -145,7 +145,7 @@ static void enh_desc_get_ext_status(void *data, struct stmmac_extra_stats *x, x->rx_msg_type_delay_req++; else if (p->des4.erx.msg_type == RDES_EXT_DELAY_RESP) x->rx_msg_type_delay_resp++; - else if (p->des4.erx.msg_type == RDES_EXT_DELAY_REQ) + else if (p->des4.erx.msg_type == RDES_EXT_PDELAY_REQ) x->rx_msg_type_pdelay_req++; else if (p->des4.erx.msg_type == RDES_EXT_PDELAY_RESP) x->rx_msg_type_pdelay_resp++; From c8df8ce3ee5ff24993bba9033e7d13b16fa3809c Mon Sep 17 00:00:00 2001 From: Vince Bridgers Date: Sun, 29 Jun 2014 20:34:53 -0500 Subject: [PATCH 065/268] net: stmmac: Remove unneeded I/O read caught by cppcheck Cppcheck found a case where a local variable was being assigned a value, but not used. There seems to be no reason to read this register before assigning a new value, so addressing thie issue. cppcheck --force --enable=all --inline-suppr . shows ... Variable 'value' is reassigned a value before the old one has been used. Signed-off-by: Vince Bridgers Signed-off-by: David S. Miller --- drivers/net/ethernet/stmicro/stmmac/dwmac1000_core.c | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac1000_core.c b/drivers/net/ethernet/stmicro/stmmac/dwmac1000_core.c index b3e148ef56839..9d3748361a1e5 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac1000_core.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac1000_core.c @@ -320,11 +320,8 @@ static void dwmac1000_set_eee_timer(void __iomem *ioaddr, int ls, int tw) static void dwmac1000_ctrl_ane(void __iomem *ioaddr, bool restart) { - u32 value; - - value = readl(ioaddr + GMAC_AN_CTRL); /* auto negotiation enable and External Loopback enable */ - value = GMAC_AN_CTRL_ANE | GMAC_AN_CTRL_ELE; + u32 value = GMAC_AN_CTRL_ANE | GMAC_AN_CTRL_ELE; if (restart) value |= GMAC_AN_CTRL_RAN; From 1b6478231c6f5f844185acb32045cf195028cfce Mon Sep 17 00:00:00 2001 From: David Vrabel Date: Wed, 2 Jul 2014 17:25:23 +0100 Subject: [PATCH 066/268] xen/manage: fix potential deadlock when resuming the console Calling xen_console_resume() in xen_suspend() causes a warning because it locks irq_mapping_update_lock (a mutex) and this may sleep. If a userspace process is using the evtchn device then this mutex may be locked at the point of the stop_machine() call and xen_console_resume() would then deadlock. Resuming the console after stop_machine() returns avoids this deadlock. Signed-off-by: David Vrabel Reviewed-by: Boris Ostrovsky Cc: --- drivers/xen/manage.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/xen/manage.c b/drivers/xen/manage.c index c3667b202f2f5..5f1e1f3cd1861 100644 --- a/drivers/xen/manage.c +++ b/drivers/xen/manage.c @@ -88,7 +88,6 @@ static int xen_suspend(void *data) if (!si->cancelled) { xen_irq_resume(); - xen_console_resume(); xen_timer_resume(); } @@ -135,6 +134,10 @@ static void do_suspend(void) err = stop_machine(xen_suspend, &si, cpumask_of(0)); + /* Resume console as early as possible. */ + if (!si.cancelled) + xen_console_resume(); + raw_notifier_call_chain(&xen_resume_notifier, 0, NULL); dpm_resume_start(si.cancelled ? PMSG_THAW : PMSG_RESTORE); From 43d826ca5979927131685cc2092c7ce862cb91cd Mon Sep 17 00:00:00 2001 From: Emmanuel Grumbach Date: Wed, 25 Jun 2014 09:12:30 +0300 Subject: [PATCH 067/268] iwlwifi: dvm: don't enable CTS to self We should always prefer to use full RTS protection. Using CTS to self gives a meaningless improvement, but this flow is much harder for the firmware which is likely to have issues with it. CC: Signed-off-by: Emmanuel Grumbach --- drivers/net/wireless/iwlwifi/dvm/rxon.c | 12 ------------ 1 file changed, 12 deletions(-) diff --git a/drivers/net/wireless/iwlwifi/dvm/rxon.c b/drivers/net/wireless/iwlwifi/dvm/rxon.c index ed50de6362ed1..6dc5dd3ced447 100644 --- a/drivers/net/wireless/iwlwifi/dvm/rxon.c +++ b/drivers/net/wireless/iwlwifi/dvm/rxon.c @@ -1068,13 +1068,6 @@ int iwlagn_commit_rxon(struct iwl_priv *priv, struct iwl_rxon_context *ctx) /* recalculate basic rates */ iwl_calc_basic_rates(priv, ctx); - /* - * force CTS-to-self frames protection if RTS-CTS is not preferred - * one aggregation protection method - */ - if (!priv->hw_params.use_rts_for_aggregation) - ctx->staging.flags |= RXON_FLG_SELF_CTS_EN; - if ((ctx->vif && ctx->vif->bss_conf.use_short_slot) || !(ctx->staging.flags & RXON_FLG_BAND_24G_MSK)) ctx->staging.flags |= RXON_FLG_SHORT_SLOT_MSK; @@ -1480,11 +1473,6 @@ void iwlagn_bss_info_changed(struct ieee80211_hw *hw, else ctx->staging.flags &= ~RXON_FLG_TGG_PROTECT_MSK; - if (bss_conf->use_cts_prot) - ctx->staging.flags |= RXON_FLG_SELF_CTS_EN; - else - ctx->staging.flags &= ~RXON_FLG_SELF_CTS_EN; - memcpy(ctx->staging.bssid_addr, bss_conf->bssid, ETH_ALEN); if (vif->type == NL80211_IFTYPE_AP || From dc271ee0d04d12d6bfabacbec803289a7072fbd9 Mon Sep 17 00:00:00 2001 From: Emmanuel Grumbach Date: Thu, 3 Jul 2014 20:46:35 +0300 Subject: [PATCH 068/268] iwlwifi: mvm: disable CTS to Self Firmware folks seem say that this flag can make trouble. Drop it. The advantage of CTS to self is that it slightly reduces the cost of the protection, but make the protection less reliable. Cc: [3.13+] Signed-off-by: Emmanuel Grumbach --- drivers/net/wireless/iwlwifi/mvm/mac-ctxt.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/drivers/net/wireless/iwlwifi/mvm/mac-ctxt.c b/drivers/net/wireless/iwlwifi/mvm/mac-ctxt.c index 8b53027776325..725ba49576bf6 100644 --- a/drivers/net/wireless/iwlwifi/mvm/mac-ctxt.c +++ b/drivers/net/wireless/iwlwifi/mvm/mac-ctxt.c @@ -667,10 +667,9 @@ static void iwl_mvm_mac_ctxt_cmd_common(struct iwl_mvm *mvm, if (vif->bss_conf.qos) cmd->qos_flags |= cpu_to_le32(MAC_QOS_FLG_UPDATE_EDCA); - if (vif->bss_conf.use_cts_prot) { + if (vif->bss_conf.use_cts_prot) cmd->protection_flags |= cpu_to_le32(MAC_PROT_FLG_TGG_PROTECT); - cmd->protection_flags |= cpu_to_le32(MAC_PROT_FLG_SELF_CTS_EN); - } + IWL_DEBUG_RATE(mvm, "use_cts_prot %d, ht_operation_mode %d\n", vif->bss_conf.use_cts_prot, vif->bss_conf.ht_operation_mode); From 701a9e619347ac06d59481db14bbc4df99763270 Mon Sep 17 00:00:00 2001 From: Amitkumar Karwar Date: Tue, 1 Jul 2014 14:39:04 -0700 Subject: [PATCH 069/268] mwifiex: initialize Tx/Rx info of a packet correctly There are few places at the begining of Tx/Rx paths where tx_info/rx_info is not correctly initialized. This patch takes care of it. Signed-off-by: Amitkumar Karwar Signed-off-by: Avinash Patil Signed-off-by: Bing Zhao Signed-off-by: John W. Linville --- drivers/net/wireless/mwifiex/11n_aggr.c | 1 + drivers/net/wireless/mwifiex/cfg80211.c | 1 + drivers/net/wireless/mwifiex/cmdevt.c | 1 + drivers/net/wireless/mwifiex/sta_tx.c | 1 + drivers/net/wireless/mwifiex/tdls.c | 2 ++ drivers/net/wireless/mwifiex/txrx.c | 1 + drivers/net/wireless/mwifiex/uap_txrx.c | 1 + 7 files changed, 8 insertions(+) diff --git a/drivers/net/wireless/mwifiex/11n_aggr.c b/drivers/net/wireless/mwifiex/11n_aggr.c index 5b32106182f81..fe0f66f735076 100644 --- a/drivers/net/wireless/mwifiex/11n_aggr.c +++ b/drivers/net/wireless/mwifiex/11n_aggr.c @@ -185,6 +185,7 @@ mwifiex_11n_aggregate_pkt(struct mwifiex_private *priv, skb_reserve(skb_aggr, headroom + sizeof(struct txpd)); tx_info_aggr = MWIFIEX_SKB_TXCB(skb_aggr); + memset(tx_info_aggr, 0, sizeof(*tx_info_aggr)); tx_info_aggr->bss_type = tx_info_src->bss_type; tx_info_aggr->bss_num = tx_info_src->bss_num; diff --git a/drivers/net/wireless/mwifiex/cfg80211.c b/drivers/net/wireless/mwifiex/cfg80211.c index e95dec91a561e..b511613bba2d8 100644 --- a/drivers/net/wireless/mwifiex/cfg80211.c +++ b/drivers/net/wireless/mwifiex/cfg80211.c @@ -220,6 +220,7 @@ mwifiex_cfg80211_mgmt_tx(struct wiphy *wiphy, struct wireless_dev *wdev, } tx_info = MWIFIEX_SKB_TXCB(skb); + memset(tx_info, 0, sizeof(*tx_info)); tx_info->bss_num = priv->bss_num; tx_info->bss_type = priv->bss_type; tx_info->pkt_len = pkt_len; diff --git a/drivers/net/wireless/mwifiex/cmdevt.c b/drivers/net/wireless/mwifiex/cmdevt.c index 8dee6c86f4f1d..c161141f6c39e 100644 --- a/drivers/net/wireless/mwifiex/cmdevt.c +++ b/drivers/net/wireless/mwifiex/cmdevt.c @@ -453,6 +453,7 @@ int mwifiex_process_event(struct mwifiex_adapter *adapter) if (skb) { rx_info = MWIFIEX_SKB_RXCB(skb); + memset(rx_info, 0, sizeof(*rx_info)); rx_info->bss_num = priv->bss_num; rx_info->bss_type = priv->bss_type; } diff --git a/drivers/net/wireless/mwifiex/sta_tx.c b/drivers/net/wireless/mwifiex/sta_tx.c index 5fce7e78a36e7..70eb863c72497 100644 --- a/drivers/net/wireless/mwifiex/sta_tx.c +++ b/drivers/net/wireless/mwifiex/sta_tx.c @@ -150,6 +150,7 @@ int mwifiex_send_null_packet(struct mwifiex_private *priv, u8 flags) return -1; tx_info = MWIFIEX_SKB_TXCB(skb); + memset(tx_info, 0, sizeof(*tx_info)); tx_info->bss_num = priv->bss_num; tx_info->bss_type = priv->bss_type; tx_info->pkt_len = data_len - (sizeof(struct txpd) + INTF_HEADER_LEN); diff --git a/drivers/net/wireless/mwifiex/tdls.c b/drivers/net/wireless/mwifiex/tdls.c index e73034fbbde92..0e88364e0c670 100644 --- a/drivers/net/wireless/mwifiex/tdls.c +++ b/drivers/net/wireless/mwifiex/tdls.c @@ -605,6 +605,7 @@ int mwifiex_send_tdls_data_frame(struct mwifiex_private *priv, const u8 *peer, } tx_info = MWIFIEX_SKB_TXCB(skb); + memset(tx_info, 0, sizeof(*tx_info)); tx_info->bss_num = priv->bss_num; tx_info->bss_type = priv->bss_type; @@ -760,6 +761,7 @@ int mwifiex_send_tdls_action_frame(struct mwifiex_private *priv, const u8 *peer, skb->priority = MWIFIEX_PRIO_VI; tx_info = MWIFIEX_SKB_TXCB(skb); + memset(tx_info, 0, sizeof(*tx_info)); tx_info->bss_num = priv->bss_num; tx_info->bss_type = priv->bss_type; tx_info->flags |= MWIFIEX_BUF_FLAG_TDLS_PKT; diff --git a/drivers/net/wireless/mwifiex/txrx.c b/drivers/net/wireless/mwifiex/txrx.c index 37f26afd43143..fd7e5b9b4581f 100644 --- a/drivers/net/wireless/mwifiex/txrx.c +++ b/drivers/net/wireless/mwifiex/txrx.c @@ -55,6 +55,7 @@ int mwifiex_handle_rx_packet(struct mwifiex_adapter *adapter, return -1; } + memset(rx_info, 0, sizeof(*rx_info)); rx_info->bss_num = priv->bss_num; rx_info->bss_type = priv->bss_type; diff --git a/drivers/net/wireless/mwifiex/uap_txrx.c b/drivers/net/wireless/mwifiex/uap_txrx.c index 9a56bc61cb1d2..b0601b91cc4f1 100644 --- a/drivers/net/wireless/mwifiex/uap_txrx.c +++ b/drivers/net/wireless/mwifiex/uap_txrx.c @@ -175,6 +175,7 @@ static void mwifiex_uap_queue_bridged_pkt(struct mwifiex_private *priv, } tx_info = MWIFIEX_SKB_TXCB(skb); + memset(tx_info, 0, sizeof(*tx_info)); tx_info->bss_num = priv->bss_num; tx_info->bss_type = priv->bss_type; tx_info->flags |= MWIFIEX_BUF_FLAG_BRIDGED_PKT; From fb9a0c443691ceaab3daba966bbbd9f5ff3aa26f Mon Sep 17 00:00:00 2001 From: David Vrabel Date: Fri, 27 Jun 2014 10:42:03 +0100 Subject: [PATCH 070/268] xen/balloon: set ballooned out pages as invalid in p2m Since cd9151e26d31048b2b5e00fd02e110e07d2200c9 (xen/balloon: set a mapping for ballooned out pages), a ballooned out page had its entry in the p2m set to the MFN of one of the scratch pages. This means that the p2m will contain many entries pointing to the same MFN. During a domain save, these many-to-one entries are not identified as such and the scratch page is saved multiple times. On restore the ballooned pages are populated with new frames and the domain may use up its allocation before all pages can be restored. Since the original fix only needed to keep a mapping for the ballooned page it is safe to set ballooned out pages as INVALID_P2M_ENTRY in the p2m (as they were before). Thus preventing them from being saved and re-populated on restore. Signed-off-by: David Vrabel Reported-by: Marek Marczykowski Tested-by: Marek Marczykowski Acked-by: Stefano Stabellini Cc: --- drivers/xen/balloon.c | 12 +++++------- 1 file changed, 5 insertions(+), 7 deletions(-) diff --git a/drivers/xen/balloon.c b/drivers/xen/balloon.c index b7a506f2bb144..5c660c77f03b5 100644 --- a/drivers/xen/balloon.c +++ b/drivers/xen/balloon.c @@ -426,20 +426,18 @@ static enum bp_state decrease_reservation(unsigned long nr_pages, gfp_t gfp) * p2m are consistent. */ if (!xen_feature(XENFEAT_auto_translated_physmap)) { - unsigned long p; - struct page *scratch_page = get_balloon_scratch_page(); - if (!PageHighMem(page)) { + struct page *scratch_page = get_balloon_scratch_page(); + ret = HYPERVISOR_update_va_mapping( (unsigned long)__va(pfn << PAGE_SHIFT), pfn_pte(page_to_pfn(scratch_page), PAGE_KERNEL_RO), 0); BUG_ON(ret); - } - p = page_to_pfn(scratch_page); - __set_phys_to_machine(pfn, pfn_to_mfn(p)); - put_balloon_scratch_page(); + put_balloon_scratch_page(); + } + __set_phys_to_machine(pfn, INVALID_P2M_ENTRY); } #endif From 1cbbf90d0406913ad4b44194b07f4f41bde84e54 Mon Sep 17 00:00:00 2001 From: Antti Palosaari Date: Tue, 24 Jun 2014 10:03:59 -0300 Subject: [PATCH 071/268] [media] af9035: override tuner id when bad value set into eeprom Tuner ID set into EEPROM is wrong in some cases, which causes driver to select wrong tuner profile. That leads device non-working. Fix issue by overriding known bad tuner IDs with suitable default value. Thanks to MX-NET Telekomunikace s.r.o. for providing non-working DTV stick, that I could fix the bug! Cc: stable@vger.kernel.org # v3.15+ Signed-off-by: Antti Palosaari Signed-off-by: Mauro Carvalho Chehab --- drivers/media/usb/dvb-usb-v2/af9035.c | 40 ++++++++++++++++++++++----- 1 file changed, 33 insertions(+), 7 deletions(-) diff --git a/drivers/media/usb/dvb-usb-v2/af9035.c b/drivers/media/usb/dvb-usb-v2/af9035.c index 021e4d35e4d7d..7b9b75f607747 100644 --- a/drivers/media/usb/dvb-usb-v2/af9035.c +++ b/drivers/media/usb/dvb-usb-v2/af9035.c @@ -704,15 +704,41 @@ static int af9035_read_config(struct dvb_usb_device *d) if (ret < 0) goto err; - if (tmp == 0x00) - dev_dbg(&d->udev->dev, - "%s: [%d]tuner not set, using default\n", - __func__, i); - else + dev_dbg(&d->udev->dev, "%s: [%d]tuner=%02x\n", + __func__, i, tmp); + + /* tuner sanity check */ + if (state->chip_type == 0x9135) { + if (state->chip_version == 0x02) { + /* IT9135 BX (v2) */ + switch (tmp) { + case AF9033_TUNER_IT9135_60: + case AF9033_TUNER_IT9135_61: + case AF9033_TUNER_IT9135_62: + state->af9033_config[i].tuner = tmp; + break; + } + } else { + /* IT9135 AX (v1) */ + switch (tmp) { + case AF9033_TUNER_IT9135_38: + case AF9033_TUNER_IT9135_51: + case AF9033_TUNER_IT9135_52: + state->af9033_config[i].tuner = tmp; + break; + } + } + } else { + /* AF9035 */ state->af9033_config[i].tuner = tmp; + } - dev_dbg(&d->udev->dev, "%s: [%d]tuner=%02x\n", - __func__, i, state->af9033_config[i].tuner); + if (state->af9033_config[i].tuner != tmp) { + dev_info(&d->udev->dev, + "%s: [%d] overriding tuner from %02x to %02x\n", + KBUILD_MODNAME, i, tmp, + state->af9033_config[i].tuner); + } switch (state->af9033_config[i].tuner) { case AF9033_TUNER_TUA9001: From 9249196867803227dfb6b5f01f7683ce9b5572fd Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Thu, 12 Jun 2014 04:01:45 -0300 Subject: [PATCH 072/268] [media] davinci: vpif: missing unlocks on error We recently changed some locking around so we need some new unlocks on the error paths. Signed-off-by: Dan Carpenter Signed-off-by: Hans Verkuil Signed-off-by: Mauro Carvalho Chehab --- drivers/media/platform/davinci/vpif_capture.c | 1 + drivers/media/platform/davinci/vpif_display.c | 1 + 2 files changed, 2 insertions(+) diff --git a/drivers/media/platform/davinci/vpif_capture.c b/drivers/media/platform/davinci/vpif_capture.c index a7ed16497903e..1e4ec697fb105 100644 --- a/drivers/media/platform/davinci/vpif_capture.c +++ b/drivers/media/platform/davinci/vpif_capture.c @@ -269,6 +269,7 @@ static int vpif_start_streaming(struct vb2_queue *vq, unsigned int count) list_del(&buf->list); vb2_buffer_done(&buf->vb, VB2_BUF_STATE_QUEUED); } + spin_unlock_irqrestore(&common->irqlock, flags); return ret; } diff --git a/drivers/media/platform/davinci/vpif_display.c b/drivers/media/platform/davinci/vpif_display.c index 5bb085b19bcbd..b431b58f39e36 100644 --- a/drivers/media/platform/davinci/vpif_display.c +++ b/drivers/media/platform/davinci/vpif_display.c @@ -233,6 +233,7 @@ static int vpif_start_streaming(struct vb2_queue *vq, unsigned int count) list_del(&buf->list); vb2_buffer_done(&buf->vb, VB2_BUF_STATE_QUEUED); } + spin_unlock_irqrestore(&common->irqlock, flags); return ret; } From 3445857b22eafb70a6ac258979e955b116bfd2c6 Mon Sep 17 00:00:00 2001 From: Hans Verkuil Date: Mon, 16 Jun 2014 09:08:29 -0300 Subject: [PATCH 073/268] [media] hdpvr: fix two audio bugs When the audio encoding is changed the driver calls hdpvr_set_audio with the current opt->audio_input value. However, that should have been opt->audio_input + 1. So changing the audio encoding inadvertently changes the input as well. This bug has always been there. The second bug was introduced in kernel 3.10 and that broke the default_audio_input module option handling: the audio encoding was never switched to AC3 if default_audio_input was set to 2 (SPDIF input). In addition, since starting with 3.10 the audio encoding is always set at the start the first bug now always happens when the driver is loaded. In the past this bug would only surface if the user would change the audio encoding after the driver was loaded. Also fixes a small trivial typo (bufffer -> buffer). Signed-off-by: Hans Verkuil Reported-by: Scott Doty Cc: stable@vger.kernel.org # for v3.10 and up Signed-off-by: Mauro Carvalho Chehab --- drivers/media/usb/hdpvr/hdpvr-video.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/media/usb/hdpvr/hdpvr-video.c b/drivers/media/usb/hdpvr/hdpvr-video.c index 0500c4175d5f0..6bce01a674f9e 100644 --- a/drivers/media/usb/hdpvr/hdpvr-video.c +++ b/drivers/media/usb/hdpvr/hdpvr-video.c @@ -82,7 +82,7 @@ static void hdpvr_read_bulk_callback(struct urb *urb) } /*=========================================================================*/ -/* bufffer bits */ +/* buffer bits */ /* function expects dev->io_mutex to be hold by caller */ int hdpvr_cancel_queue(struct hdpvr_device *dev) @@ -926,7 +926,7 @@ static int hdpvr_s_ctrl(struct v4l2_ctrl *ctrl) case V4L2_CID_MPEG_AUDIO_ENCODING: if (dev->flags & HDPVR_FLAG_AC3_CAP) { opt->audio_codec = ctrl->val; - return hdpvr_set_audio(dev, opt->audio_input, + return hdpvr_set_audio(dev, opt->audio_input + 1, opt->audio_codec); } return 0; @@ -1198,7 +1198,7 @@ int hdpvr_register_videodev(struct hdpvr_device *dev, struct device *parent, v4l2_ctrl_new_std_menu(hdl, &hdpvr_ctrl_ops, V4L2_CID_MPEG_AUDIO_ENCODING, ac3 ? V4L2_MPEG_AUDIO_ENCODING_AC3 : V4L2_MPEG_AUDIO_ENCODING_AAC, - 0x7, V4L2_MPEG_AUDIO_ENCODING_AAC); + 0x7, ac3 ? dev->options.audio_codec : V4L2_MPEG_AUDIO_ENCODING_AAC); v4l2_ctrl_new_std_menu(hdl, &hdpvr_ctrl_ops, V4L2_CID_MPEG_VIDEO_ENCODING, V4L2_MPEG_VIDEO_ENCODING_MPEG_4_AVC, 0x3, From ef8290ac727b1718417a781412fb6c25e97b5389 Mon Sep 17 00:00:00 2001 From: Michael Welling Date: Tue, 10 Jun 2014 13:46:34 -0500 Subject: [PATCH 074/268] gpio: mcp23s08: Eliminates redundant checking. Unnecessary checking was added during the merge of the gpio branch. This patch removes the extra unnecessary checking. Signed-off-by: Michael Welling Signed-off-by: Linus Walleij --- drivers/gpio/gpio-mcp23s08.c | 6 ------ 1 file changed, 6 deletions(-) diff --git a/drivers/gpio/gpio-mcp23s08.c b/drivers/gpio/gpio-mcp23s08.c index fe7c0e211f9a8..57adbc90fdad8 100644 --- a/drivers/gpio/gpio-mcp23s08.c +++ b/drivers/gpio/gpio-mcp23s08.c @@ -900,8 +900,6 @@ static int mcp23s08_probe(struct spi_device *spi) if (spi_present_mask & (1 << addr)) chips++; } - if (!chips) - return -ENODEV; } else { type = spi_get_device_id(spi)->driver_data; pdata = dev_get_platdata(&spi->dev); @@ -940,10 +938,6 @@ static int mcp23s08_probe(struct spi_device *spi) if (!(spi_present_mask & (1 << addr))) continue; chips--; - if (chips < 0) { - dev_err(&spi->dev, "FATAL: invalid negative chip id\n"); - goto fail; - } data->mcp[addr] = &data->chip[chips]; status = mcp23s08_probe_one(data->mcp[addr], &spi->dev, spi, 0x40 | (addr << 1), type, base, From 6938ad40cb97a52d88a763008935340729a4acc7 Mon Sep 17 00:00:00 2001 From: Ted Juan Date: Fri, 20 Jun 2014 17:32:05 +0800 Subject: [PATCH 075/268] mtd: devices: elm: fix elm_context_save() and elm_context_restore() functions These two function's switch case lack the 'break' that make them always return error. Signed-off-by: Ted Juan Acked-by: Pekon Gupta Cc: # 3.12.x+ Signed-off-by: Brian Norris --- drivers/mtd/devices/elm.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/mtd/devices/elm.c b/drivers/mtd/devices/elm.c index 7df86948e6d40..b4f61c7fc161c 100644 --- a/drivers/mtd/devices/elm.c +++ b/drivers/mtd/devices/elm.c @@ -475,6 +475,7 @@ static int elm_context_save(struct elm_info *info) ELM_SYNDROME_FRAGMENT_1 + offset); regs->elm_syndrome_fragment_0[i] = elm_read_reg(info, ELM_SYNDROME_FRAGMENT_0 + offset); + break; default: return -EINVAL; } @@ -520,6 +521,7 @@ static int elm_context_restore(struct elm_info *info) regs->elm_syndrome_fragment_1[i]); elm_write_reg(info, ELM_SYNDROME_FRAGMENT_0 + offset, regs->elm_syndrome_fragment_0[i]); + break; default: return -EINVAL; } From a55c072dfe520f8fa03cf11b07b9268a8a17820a Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Fri, 13 Jun 2014 13:11:51 +0200 Subject: [PATCH 076/268] efi/arm64: efistub: remove local copy of linux_banner The shared efistub code for ARM and arm64 contains a local copy of linux_banner, allowing it to be referenced from separate executables such as the ARM decompressor. However, this introduces a dependency on generated header files, causing unnecessary rebuilds of the stub itself and, in case of arm64, vmlinux which contains it. On arm64, the copy is not actually needed since we can reference the original symbol directly, and as it turns out, there may be better ways to deal with this for ARM as well, so let's remove it from the shared code. If it still needs to be reintroduced for ARM later, it should live under arch/arm anyway and not in shared code. Signed-off-by: Ard Biesheuvel Acked-by: Will Deacon Signed-off-by: Matt Fleming --- arch/arm64/kernel/efi-stub.c | 2 -- drivers/firmware/efi/fdt.c | 10 ---------- 2 files changed, 12 deletions(-) diff --git a/arch/arm64/kernel/efi-stub.c b/arch/arm64/kernel/efi-stub.c index 60e98a639ac55..e786e6cdc400d 100644 --- a/arch/arm64/kernel/efi-stub.c +++ b/arch/arm64/kernel/efi-stub.c @@ -12,8 +12,6 @@ #include #include #include -#include -#include /* * AArch64 requires the DTB to be 8-byte aligned in the first 512MiB from diff --git a/drivers/firmware/efi/fdt.c b/drivers/firmware/efi/fdt.c index 82d774161cc9d..507a3df46a5da 100644 --- a/drivers/firmware/efi/fdt.c +++ b/drivers/firmware/efi/fdt.c @@ -23,16 +23,6 @@ static efi_status_t update_fdt(efi_system_table_t *sys_table, void *orig_fdt, u32 fdt_val32; u64 fdt_val64; - /* - * Copy definition of linux_banner here. Since this code is - * built as part of the decompressor for ARM v7, pulling - * in version.c where linux_banner is defined for the - * kernel brings other kernel dependencies with it. - */ - const char linux_banner[] = - "Linux version " UTS_RELEASE " (" LINUX_COMPILE_BY "@" - LINUX_COMPILE_HOST ") (" LINUX_COMPILER ") " UTS_VERSION "\n"; - /* Do some checks on provided FDT, if it exists*/ if (orig_fdt) { if (fdt_check_header(orig_fdt)) { From d033f48f3a4a9279c7475891fbb060d4881c22da Mon Sep 17 00:00:00 2001 From: Varun Sethi Date: Tue, 24 Jun 2014 19:27:15 +0530 Subject: [PATCH 077/268] iommu/fsl: Fix PAMU window size check. is_power_of_2 requires an unsigned long parameter which would lead to truncation of 64 bit values on 32 bit architectures. __ffs also expects an unsigned long parameter thus won't work for 64 bit values on 32 bit architectures. Signed-off-by: Varun Sethi Tested-by: Emil Medve Signed-off-by: Joerg Roedel --- drivers/iommu/fsl_pamu.c | 8 ++++---- drivers/iommu/fsl_pamu_domain.c | 2 +- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/drivers/iommu/fsl_pamu.c b/drivers/iommu/fsl_pamu.c index b99dd88e31b9b..bb446d742a2d8 100644 --- a/drivers/iommu/fsl_pamu.c +++ b/drivers/iommu/fsl_pamu.c @@ -170,10 +170,10 @@ int pamu_disable_liodn(int liodn) static unsigned int map_addrspace_size_to_wse(phys_addr_t addrspace_size) { /* Bug if not a power of 2 */ - BUG_ON(!is_power_of_2(addrspace_size)); + BUG_ON((addrspace_size & (addrspace_size - 1))); /* window size is 2^(WSE+1) bytes */ - return __ffs(addrspace_size) - 1; + return fls64(addrspace_size) - 2; } /* Derive the PAACE window count encoding for the subwindow count */ @@ -351,7 +351,7 @@ int pamu_config_ppaace(int liodn, phys_addr_t win_addr, phys_addr_t win_size, struct paace *ppaace; unsigned long fspi; - if (!is_power_of_2(win_size) || win_size < PAMU_PAGE_SIZE) { + if ((win_size & (win_size - 1)) || win_size < PAMU_PAGE_SIZE) { pr_debug("window size too small or not a power of two %llx\n", win_size); return -EINVAL; } @@ -464,7 +464,7 @@ int pamu_config_spaace(int liodn, u32 subwin_cnt, u32 subwin, return -ENOENT; } - if (!is_power_of_2(subwin_size) || subwin_size < PAMU_PAGE_SIZE) { + if ((subwin_size & (subwin_size - 1)) || subwin_size < PAMU_PAGE_SIZE) { pr_debug("subwindow size out of range, or not a power of 2\n"); return -EINVAL; } diff --git a/drivers/iommu/fsl_pamu_domain.c b/drivers/iommu/fsl_pamu_domain.c index 93072ba44b1d1..3dd0b8edd4290 100644 --- a/drivers/iommu/fsl_pamu_domain.c +++ b/drivers/iommu/fsl_pamu_domain.c @@ -301,7 +301,7 @@ static int check_size(u64 size, dma_addr_t iova) * Size must be a power of two and at least be equal * to PAMU page size. */ - if (!is_power_of_2(size) || size < PAMU_PAGE_SIZE) { + if ((size & (size - 1)) || size < PAMU_PAGE_SIZE) { pr_debug("%s: size too small or not a power of two\n", __func__); return -EINVAL; } From 75f0e4615dc328e67634eccd86bc71597da9f8a3 Mon Sep 17 00:00:00 2001 From: Varun Sethi Date: Tue, 24 Jun 2014 19:27:16 +0530 Subject: [PATCH 078/268] iommu/fsl: Fix the device domain attach condition. Store the domain information for the device, only if it's not already attached to a domain. Signed-off-by: Varun Sethi Signed-off-by: Joerg Roedel --- drivers/iommu/fsl_pamu_domain.c | 9 ++------- 1 file changed, 2 insertions(+), 7 deletions(-) diff --git a/drivers/iommu/fsl_pamu_domain.c b/drivers/iommu/fsl_pamu_domain.c index 3dd0b8edd4290..54060d16dec88 100644 --- a/drivers/iommu/fsl_pamu_domain.c +++ b/drivers/iommu/fsl_pamu_domain.c @@ -335,11 +335,6 @@ static struct fsl_dma_domain *iommu_alloc_dma_domain(void) return domain; } -static inline struct device_domain_info *find_domain(struct device *dev) -{ - return dev->archdata.iommu_domain; -} - static void remove_device_ref(struct device_domain_info *info, u32 win_cnt) { unsigned long flags; @@ -380,7 +375,7 @@ static void attach_device(struct fsl_dma_domain *dma_domain, int liodn, struct d * Check here if the device is already attached to domain or not. * If the device is already attached to a domain detach it. */ - old_domain_info = find_domain(dev); + old_domain_info = dev->archdata.iommu_domain; if (old_domain_info && old_domain_info->domain != dma_domain) { spin_unlock_irqrestore(&device_domain_lock, flags); detach_device(dev, old_domain_info->domain); @@ -399,7 +394,7 @@ static void attach_device(struct fsl_dma_domain *dma_domain, int liodn, struct d * the info for the first LIODN as all * LIODNs share the same domain */ - if (!old_domain_info) + if (!dev->archdata.iommu_domain) dev->archdata.iommu_domain = info; spin_unlock_irqrestore(&device_domain_lock, flags); From 3170447c1f264d51b8d1f3898bf2588588a64fdc Mon Sep 17 00:00:00 2001 From: Varun Sethi Date: Tue, 24 Jun 2014 19:27:17 +0530 Subject: [PATCH 079/268] iommu/fsl: Fix the error condition during iommu group Earlier PTR_ERR was being returned even if group was set to null. Now, we explicitly set an ERR_PTR value in case the group pointer is NULL. Signed-off-by: Varun Sethi Signed-off-by: Joerg Roedel --- drivers/iommu/fsl_pamu_domain.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/drivers/iommu/fsl_pamu_domain.c b/drivers/iommu/fsl_pamu_domain.c index 54060d16dec88..af47648301a9e 100644 --- a/drivers/iommu/fsl_pamu_domain.c +++ b/drivers/iommu/fsl_pamu_domain.c @@ -1037,12 +1037,15 @@ static struct iommu_group *get_pci_device_group(struct pci_dev *pdev) group = get_shared_pci_device_group(pdev); } + if (!group) + group = ERR_PTR(-ENODEV); + return group; } static int fsl_pamu_add_device(struct device *dev) { - struct iommu_group *group = NULL; + struct iommu_group *group = ERR_PTR(-ENODEV); struct pci_dev *pdev; const u32 *prop; int ret, len; @@ -1065,7 +1068,7 @@ static int fsl_pamu_add_device(struct device *dev) group = get_device_iommu_group(dev); } - if (!group || IS_ERR(group)) + if (IS_ERR(group)) return PTR_ERR(group); ret = iommu_group_add_device(group, dev); From 6ed179b67ca1a05034728ab160905900416b1835 Mon Sep 17 00:00:00 2001 From: Anton Blanchard Date: Thu, 12 Jun 2014 18:16:53 +1000 Subject: [PATCH 080/268] KVM: PPC: Assembly functions exported to modules need _GLOBAL_TOC() Both kvmppc_hv_entry_trampoline and kvmppc_entry_trampoline are assembly functions that are exported to modules and also require a valid r2. As such we need to use _GLOBAL_TOC so we provide a global entry point that establishes the TOC (r2). Signed-off-by: Anton Blanchard Signed-off-by: Alexander Graf --- arch/powerpc/kvm/book3s_hv_rmhandlers.S | 2 +- arch/powerpc/kvm/book3s_rmhandlers.S | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/powerpc/kvm/book3s_hv_rmhandlers.S b/arch/powerpc/kvm/book3s_hv_rmhandlers.S index 77356fd25ccc9..8d9c5d21179d2 100644 --- a/arch/powerpc/kvm/book3s_hv_rmhandlers.S +++ b/arch/powerpc/kvm/book3s_hv_rmhandlers.S @@ -48,7 +48,7 @@ * * LR = return address to continue at after eventually re-enabling MMU */ -_GLOBAL(kvmppc_hv_entry_trampoline) +_GLOBAL_TOC(kvmppc_hv_entry_trampoline) mflr r0 std r0, PPC_LR_STKOFF(r1) stdu r1, -112(r1) diff --git a/arch/powerpc/kvm/book3s_rmhandlers.S b/arch/powerpc/kvm/book3s_rmhandlers.S index 9eec675220e62..4850a224e5bf2 100644 --- a/arch/powerpc/kvm/book3s_rmhandlers.S +++ b/arch/powerpc/kvm/book3s_rmhandlers.S @@ -146,7 +146,7 @@ kvmppc_handler_skip_ins: * On entry, r4 contains the guest shadow MSR * MSR.EE has to be 0 when calling this function */ -_GLOBAL(kvmppc_entry_trampoline) +_GLOBAL_TOC(kvmppc_entry_trampoline) mfmsr r5 LOAD_REG_ADDR(r7, kvmppc_handler_trampoline_enter) toreal(r7) From 55ab169b7b9276e6e1e01a88531bcf34803dcde2 Mon Sep 17 00:00:00 2001 From: Alexander Graf Date: Mon, 16 Jun 2014 14:37:53 +0200 Subject: [PATCH 081/268] KVM: PPC: Book3S PR: Fix ABIv2 on LE We switched to ABIv2 on Little Endian systems now which gets rid of the dotted function names. Branch to the actual functions when we see such a system. Signed-off-by: Alexander Graf --- arch/powerpc/kvm/book3s_interrupts.S | 4 ++++ arch/powerpc/kvm/book3s_rmhandlers.S | 4 ++++ 2 files changed, 8 insertions(+) diff --git a/arch/powerpc/kvm/book3s_interrupts.S b/arch/powerpc/kvm/book3s_interrupts.S index e2c29e381dc70..d044b8b7c69dd 100644 --- a/arch/powerpc/kvm/book3s_interrupts.S +++ b/arch/powerpc/kvm/book3s_interrupts.S @@ -25,7 +25,11 @@ #include #if defined(CONFIG_PPC_BOOK3S_64) +#if defined(_CALL_ELF) && _CALL_ELF == 2 +#define FUNC(name) name +#else #define FUNC(name) GLUE(.,name) +#endif #define GET_SHADOW_VCPU(reg) addi reg, r13, PACA_SVCPU #elif defined(CONFIG_PPC_BOOK3S_32) diff --git a/arch/powerpc/kvm/book3s_rmhandlers.S b/arch/powerpc/kvm/book3s_rmhandlers.S index 4850a224e5bf2..16c4d88ba27df 100644 --- a/arch/powerpc/kvm/book3s_rmhandlers.S +++ b/arch/powerpc/kvm/book3s_rmhandlers.S @@ -36,7 +36,11 @@ #if defined(CONFIG_PPC_BOOK3S_64) +#if defined(_CALL_ELF) && _CALL_ELF == 2 +#define FUNC(name) name +#else #define FUNC(name) GLUE(.,name) +#endif #elif defined(CONFIG_PPC_BOOK3S_32) From 525549d7342fca7fca9fc11298b5ab3617b6f730 Mon Sep 17 00:00:00 2001 From: Thierry Reding Date: Mon, 7 Jul 2014 15:13:12 +0200 Subject: [PATCH 082/268] ALSA: hda: Fix build warning The hda_tegra_disable_clocks() function is only used by the suspend and resume code, so it needs to be included in the #ifdef CONFIG_PM_SLEEP block to prevent the following warning: CC sound/pci/hda/hda_tegra.o sound/pci/hda/hda_tegra.c:238:13: warning: 'hda_tegra_disable_clocks' defined but not used [-Wunused-function] static void hda_tegra_disable_clocks(struct hda_tegra *data) ^ Signed-off-by: Thierry Reding Signed-off-by: Takashi Iwai --- sound/pci/hda/hda_tegra.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sound/pci/hda/hda_tegra.c b/sound/pci/hda/hda_tegra.c index a366ba9293a81..358414da64183 100644 --- a/sound/pci/hda/hda_tegra.c +++ b/sound/pci/hda/hda_tegra.c @@ -236,6 +236,7 @@ static int hda_tegra_enable_clocks(struct hda_tegra *data) return rc; } +#ifdef CONFIG_PM_SLEEP static void hda_tegra_disable_clocks(struct hda_tegra *data) { clk_disable_unprepare(data->hda2hdmi_clk); @@ -243,7 +244,6 @@ static void hda_tegra_disable_clocks(struct hda_tegra *data) clk_disable_unprepare(data->hda_clk); } -#ifdef CONFIG_PM_SLEEP /* * power management */ From 126b9d4365b110c157bc4cbc32540dfa66c9c85a Mon Sep 17 00:00:00 2001 From: Miklos Szeredi Date: Mon, 7 Jul 2014 15:28:50 +0200 Subject: [PATCH 083/268] fuse: timeout comparison fix As suggested by checkpatch.pl, use time_before64() instead of direct comparison of jiffies64 values. Signed-off-by: Miklos Szeredi Cc: --- fs/fuse/dir.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/fs/fuse/dir.c b/fs/fuse/dir.c index 42198359fa1b4..225176203a8ca 100644 --- a/fs/fuse/dir.c +++ b/fs/fuse/dir.c @@ -198,7 +198,7 @@ static int fuse_dentry_revalidate(struct dentry *entry, unsigned int flags) inode = ACCESS_ONCE(entry->d_inode); if (inode && is_bad_inode(inode)) goto invalid; - else if (fuse_dentry_time(entry) < get_jiffies_64()) { + else if (time_before64(fuse_dentry_time(entry), get_jiffies_64())) { int err; struct fuse_entry_out outarg; struct fuse_req *req; @@ -985,7 +985,7 @@ int fuse_update_attributes(struct inode *inode, struct kstat *stat, int err; bool r; - if (fi->i_time < get_jiffies_64()) { + if (time_before64(fi->i_time, get_jiffies_64())) { r = true; err = fuse_do_getattr(inode, stat, file); } else { @@ -1171,7 +1171,7 @@ static int fuse_permission(struct inode *inode, int mask) ((mask & MAY_EXEC) && S_ISREG(inode->i_mode))) { struct fuse_inode *fi = get_fuse_inode(inode); - if (fi->i_time < get_jiffies_64()) { + if (time_before64(fi->i_time, get_jiffies_64())) { refreshed = true; err = fuse_perm_getattr(inode, mask); From 154210ccb3a871e631bf39fdeb7a8731d98af87b Mon Sep 17 00:00:00 2001 From: Anand Avati Date: Thu, 26 Jun 2014 20:21:57 -0400 Subject: [PATCH 084/268] fuse: ignore entry-timeout on LOOKUP_REVAL The following test case demonstrates the bug: sh# mount -t glusterfs localhost:meta-test /mnt/one sh# mount -t glusterfs localhost:meta-test /mnt/two sh# echo stuff > /mnt/one/file; rm -f /mnt/two/file; echo stuff > /mnt/one/file bash: /mnt/one/file: Stale file handle sh# echo stuff > /mnt/one/file; rm -f /mnt/two/file; sleep 1; echo stuff > /mnt/one/file On the second open() on /mnt/one, FUSE would have used the old nodeid (file handle) trying to re-open it. Gluster is returning -ESTALE. The ESTALE propagates back to namei.c:filename_lookup() where lookup is re-attempted with LOOKUP_REVAL. The right behavior now, would be for FUSE to ignore the entry-timeout and and do the up-call revalidation. Instead FUSE is ignoring LOOKUP_REVAL, succeeding the revalidation (because entry-timeout has not passed), and open() is again retried on the old file handle and finally the ESTALE is going back to the application. Fix: if revalidation is happening with LOOKUP_REVAL, then ignore entry-timeout and always do the up-call. Signed-off-by: Anand Avati Reviewed-by: Niels de Vos Signed-off-by: Miklos Szeredi Cc: stable@vger.kernel.org --- fs/fuse/dir.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/fs/fuse/dir.c b/fs/fuse/dir.c index 225176203a8ca..202a9721be931 100644 --- a/fs/fuse/dir.c +++ b/fs/fuse/dir.c @@ -198,7 +198,8 @@ static int fuse_dentry_revalidate(struct dentry *entry, unsigned int flags) inode = ACCESS_ONCE(entry->d_inode); if (inode && is_bad_inode(inode)) goto invalid; - else if (time_before64(fuse_dentry_time(entry), get_jiffies_64())) { + else if (time_before64(fuse_dentry_time(entry), get_jiffies_64()) || + (flags & LOOKUP_REVAL)) { int err; struct fuse_entry_out outarg; struct fuse_req *req; From 7b3d8bf7718a8de8ac6a25ed7332c1c129839400 Mon Sep 17 00:00:00 2001 From: Himangi Saraogi Date: Thu, 26 Jun 2014 23:06:52 +0530 Subject: [PATCH 085/268] fuse: inode: drop cast This patch removes the cast on data of type void * as it is not needed. The following Coccinelle semantic patch was used for making the change: @r@ expression x; void* e; type T; identifier f; @@ ( *((T *)e) | ((T *)x)[...] | ((T *)x)->f | - (T *) e ) Signed-off-by: Himangi Saraogi Acked-by: Julia Lawall Signed-off-by: Miklos Szeredi --- fs/fuse/inode.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/fuse/inode.c b/fs/fuse/inode.c index 754dcf23de8ab..7b8fc7d33deff 100644 --- a/fs/fuse/inode.c +++ b/fs/fuse/inode.c @@ -1006,7 +1006,7 @@ static int fuse_fill_super(struct super_block *sb, void *data, int silent) sb->s_flags &= ~(MS_NOSEC | MS_I_VERSION); - if (!parse_fuse_opt((char *) data, &d, is_bdev)) + if (!parse_fuse_opt(data, &d, is_bdev)) goto err; if (is_bdev) { From 233a01fa9c4c7c41238537e8db8434667ff28a2f Mon Sep 17 00:00:00 2001 From: Miklos Szeredi Date: Mon, 7 Jul 2014 15:28:51 +0200 Subject: [PATCH 086/268] fuse: handle large user and group ID If the number in "user_id=N" or "group_id=N" mount options was larger than INT_MAX then fuse returned EINVAL. Fix this to handle all valid uid/gid values. Signed-off-by: Miklos Szeredi Cc: stable@vger.kernel.org --- fs/fuse/inode.c | 20 ++++++++++++++++---- 1 file changed, 16 insertions(+), 4 deletions(-) diff --git a/fs/fuse/inode.c b/fs/fuse/inode.c index 7b8fc7d33deff..8474028d78480 100644 --- a/fs/fuse/inode.c +++ b/fs/fuse/inode.c @@ -478,6 +478,17 @@ static const match_table_t tokens = { {OPT_ERR, NULL} }; +static int fuse_match_uint(substring_t *s, unsigned int *res) +{ + int err = -ENOMEM; + char *buf = match_strdup(s); + if (buf) { + err = kstrtouint(buf, 10, res); + kfree(buf); + } + return err; +} + static int parse_fuse_opt(char *opt, struct fuse_mount_data *d, int is_bdev) { char *p; @@ -488,6 +499,7 @@ static int parse_fuse_opt(char *opt, struct fuse_mount_data *d, int is_bdev) while ((p = strsep(&opt, ",")) != NULL) { int token; int value; + unsigned uv; substring_t args[MAX_OPT_ARGS]; if (!*p) continue; @@ -511,18 +523,18 @@ static int parse_fuse_opt(char *opt, struct fuse_mount_data *d, int is_bdev) break; case OPT_USER_ID: - if (match_int(&args[0], &value)) + if (fuse_match_uint(&args[0], &uv)) return 0; - d->user_id = make_kuid(current_user_ns(), value); + d->user_id = make_kuid(current_user_ns(), uv); if (!uid_valid(d->user_id)) return 0; d->user_id_present = 1; break; case OPT_GROUP_ID: - if (match_int(&args[0], &value)) + if (fuse_match_uint(&args[0], &uv)) return 0; - d->group_id = make_kgid(current_user_ns(), value); + d->group_id = make_kgid(current_user_ns(), uv); if (!gid_valid(d->group_id)) return 0; d->group_id_present = 1; From c55a01d360afafcd52bc405c044a6ebf5de436d5 Mon Sep 17 00:00:00 2001 From: Miklos Szeredi Date: Mon, 7 Jul 2014 15:28:51 +0200 Subject: [PATCH 087/268] fuse: avoid scheduling while atomic As reported by Richard Sharpe, an attempt to use fuse_notify_inval_entry() triggers complains about scheduling while atomic: BUG: scheduling while atomic: fuse.hf/13976/0x10000001 This happens because fuse_notify_inval_entry() attempts to allocate memory with GFP_KERNEL, holding "struct fuse_copy_state" mapped by kmap_atomic(). Introduced by commit 58bda1da4b3c "fuse/dev: use atomic maps" Fix by moving the map/unmap to just cover the actual memcpy operation. Original patch from Maxim Patlasov Reported-by: Richard Sharpe Signed-off-by: Miklos Szeredi Cc: # v3.15+ --- fs/fuse/dev.c | 51 +++++++++++++++++++++++---------------------------- 1 file changed, 23 insertions(+), 28 deletions(-) diff --git a/fs/fuse/dev.c b/fs/fuse/dev.c index aac71ce373e44..75fa055012b22 100644 --- a/fs/fuse/dev.c +++ b/fs/fuse/dev.c @@ -643,9 +643,8 @@ struct fuse_copy_state { unsigned long seglen; unsigned long addr; struct page *pg; - void *mapaddr; - void *buf; unsigned len; + unsigned offset; unsigned move_pages:1; }; @@ -666,23 +665,17 @@ static void fuse_copy_finish(struct fuse_copy_state *cs) if (cs->currbuf) { struct pipe_buffer *buf = cs->currbuf; - if (!cs->write) { - kunmap_atomic(cs->mapaddr); - } else { - kunmap_atomic(cs->mapaddr); + if (cs->write) buf->len = PAGE_SIZE - cs->len; - } cs->currbuf = NULL; - cs->mapaddr = NULL; - } else if (cs->mapaddr) { - kunmap_atomic(cs->mapaddr); + } else if (cs->pg) { if (cs->write) { flush_dcache_page(cs->pg); set_page_dirty_lock(cs->pg); } put_page(cs->pg); - cs->mapaddr = NULL; } + cs->pg = NULL; } /* @@ -691,7 +684,7 @@ static void fuse_copy_finish(struct fuse_copy_state *cs) */ static int fuse_copy_fill(struct fuse_copy_state *cs) { - unsigned long offset; + struct page *page; int err; unlock_request(cs->fc, cs->req); @@ -706,14 +699,12 @@ static int fuse_copy_fill(struct fuse_copy_state *cs) BUG_ON(!cs->nr_segs); cs->currbuf = buf; - cs->mapaddr = kmap_atomic(buf->page); + cs->pg = buf->page; + cs->offset = buf->offset; cs->len = buf->len; - cs->buf = cs->mapaddr + buf->offset; cs->pipebufs++; cs->nr_segs--; } else { - struct page *page; - if (cs->nr_segs == cs->pipe->buffers) return -EIO; @@ -726,8 +717,8 @@ static int fuse_copy_fill(struct fuse_copy_state *cs) buf->len = 0; cs->currbuf = buf; - cs->mapaddr = kmap_atomic(page); - cs->buf = cs->mapaddr; + cs->pg = page; + cs->offset = 0; cs->len = PAGE_SIZE; cs->pipebufs++; cs->nr_segs++; @@ -740,14 +731,13 @@ static int fuse_copy_fill(struct fuse_copy_state *cs) cs->iov++; cs->nr_segs--; } - err = get_user_pages_fast(cs->addr, 1, cs->write, &cs->pg); + err = get_user_pages_fast(cs->addr, 1, cs->write, &page); if (err < 0) return err; BUG_ON(err != 1); - offset = cs->addr % PAGE_SIZE; - cs->mapaddr = kmap_atomic(cs->pg); - cs->buf = cs->mapaddr + offset; - cs->len = min(PAGE_SIZE - offset, cs->seglen); + cs->pg = page; + cs->offset = cs->addr % PAGE_SIZE; + cs->len = min(PAGE_SIZE - cs->offset, cs->seglen); cs->seglen -= cs->len; cs->addr += cs->len; } @@ -760,15 +750,20 @@ static int fuse_copy_do(struct fuse_copy_state *cs, void **val, unsigned *size) { unsigned ncpy = min(*size, cs->len); if (val) { + void *pgaddr = kmap_atomic(cs->pg); + void *buf = pgaddr + cs->offset; + if (cs->write) - memcpy(cs->buf, *val, ncpy); + memcpy(buf, *val, ncpy); else - memcpy(*val, cs->buf, ncpy); + memcpy(*val, buf, ncpy); + + kunmap_atomic(pgaddr); *val += ncpy; } *size -= ncpy; cs->len -= ncpy; - cs->buf += ncpy; + cs->offset += ncpy; return ncpy; } @@ -874,8 +869,8 @@ static int fuse_try_move_page(struct fuse_copy_state *cs, struct page **pagep) out_fallback_unlock: unlock_page(newpage); out_fallback: - cs->mapaddr = kmap_atomic(buf->page); - cs->buf = cs->mapaddr + buf->offset; + cs->pg = buf->page; + cs->offset = buf->offset; err = lock_request(cs->fc, cs->req); if (err) From 6c642e442e99af1ca026af55a16f23b5f8ee612a Mon Sep 17 00:00:00 2001 From: zhangdianfang Date: Fri, 30 May 2014 08:37:28 +0800 Subject: [PATCH 088/268] tools/liblockdep: Fix comparison of a boolean value with a value of 2 Comparison of a boolean value (!__init_state) with a value of 2 (done) as currently happens in the code is unlikely to succeed and causes repeated initialization of the pthread function pointers. Instead, remove boolean comparison so that we would initialize said function pointers only once. Ref: https://bugzilla.kernel.org/show_bug.cgi?id=76741 Cc: Jean Delvare Reported-by: David Binderman Signed-off-by: Dianfang Zhang Signed-off-by: Sasha Levin --- tools/lib/lockdep/preload.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/lib/lockdep/preload.c b/tools/lib/lockdep/preload.c index 23bd69cb5ade7..b5e52af6ddf37 100644 --- a/tools/lib/lockdep/preload.c +++ b/tools/lib/lockdep/preload.c @@ -92,7 +92,7 @@ enum { none, prepare, done, } __init_state; static void init_preload(void); static void try_init_preload(void) { - if (!__init_state != done) + if (__init_state != done) init_preload(); } From 0c37c686b336aeead2f0b982f24eafaeb19435b1 Mon Sep 17 00:00:00 2001 From: Sasha Levin Date: Mon, 7 Jul 2014 12:14:27 -0400 Subject: [PATCH 089/268] tools/liblockdep: Remove debug print left over from development Remove a debug print in init_preload() which was left over from development and isn't usefull at all currently. It was also causing false positive test results. Reported-by: S. Lockwood-Childs Signed-off-by: Sasha Levin --- tools/lib/lockdep/preload.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/tools/lib/lockdep/preload.c b/tools/lib/lockdep/preload.c index b5e52af6ddf37..51ec2ec8371be 100644 --- a/tools/lib/lockdep/preload.c +++ b/tools/lib/lockdep/preload.c @@ -439,8 +439,6 @@ __attribute__((constructor)) static void init_preload(void) ll_pthread_rwlock_unlock = dlsym(RTLD_NEXT, "pthread_rwlock_unlock"); #endif - printf("%p\n", ll_pthread_mutex_trylock);fflush(stdout); - lockdep_init(); __init_state = done; From b10827814e9c81c5a14fb73c5a6e06bd85df3f94 Mon Sep 17 00:00:00 2001 From: "S. Lockwood-Childs" Date: Mon, 7 Jul 2014 00:17:33 -0700 Subject: [PATCH 090/268] tools/liblockdep: Account for bitfield changes in lockdeps lock_acquire Commit fb9edbe984 shortened held_lock->check from a 2-bit field to a 1-bit field. Make liblockdep compatible with the new definition by passing check=1 to lock_acquire() calls, rather than the old value check=2 (which inadvertently disabled checks by overflowing to 0). Without this fix, several of the test cases in liblockdep run_tests.sh were failing. Signed-off-by: S. Lockwood-Childs Signed-off-by: Sasha Levin --- tools/lib/lockdep/include/liblockdep/mutex.h | 4 ++-- tools/lib/lockdep/include/liblockdep/rwlock.h | 8 ++++---- tools/lib/lockdep/preload.c | 16 ++++++++-------- 3 files changed, 14 insertions(+), 14 deletions(-) diff --git a/tools/lib/lockdep/include/liblockdep/mutex.h b/tools/lib/lockdep/include/liblockdep/mutex.h index c342f7087147e..ee53a42818cae 100644 --- a/tools/lib/lockdep/include/liblockdep/mutex.h +++ b/tools/lib/lockdep/include/liblockdep/mutex.h @@ -35,7 +35,7 @@ static inline int __mutex_init(liblockdep_pthread_mutex_t *lock, static inline int liblockdep_pthread_mutex_lock(liblockdep_pthread_mutex_t *lock) { - lock_acquire(&lock->dep_map, 0, 0, 0, 2, NULL, (unsigned long)_RET_IP_); + lock_acquire(&lock->dep_map, 0, 0, 0, 1, NULL, (unsigned long)_RET_IP_); return pthread_mutex_lock(&lock->mutex); } @@ -47,7 +47,7 @@ static inline int liblockdep_pthread_mutex_unlock(liblockdep_pthread_mutex_t *lo static inline int liblockdep_pthread_mutex_trylock(liblockdep_pthread_mutex_t *lock) { - lock_acquire(&lock->dep_map, 0, 1, 0, 2, NULL, (unsigned long)_RET_IP_); + lock_acquire(&lock->dep_map, 0, 1, 0, 1, NULL, (unsigned long)_RET_IP_); return pthread_mutex_trylock(&lock->mutex) == 0 ? 1 : 0; } diff --git a/tools/lib/lockdep/include/liblockdep/rwlock.h b/tools/lib/lockdep/include/liblockdep/rwlock.h index a680ab8c2e364..4ec03f8615516 100644 --- a/tools/lib/lockdep/include/liblockdep/rwlock.h +++ b/tools/lib/lockdep/include/liblockdep/rwlock.h @@ -36,7 +36,7 @@ static inline int __rwlock_init(liblockdep_pthread_rwlock_t *lock, static inline int liblockdep_pthread_rwlock_rdlock(liblockdep_pthread_rwlock_t *lock) { - lock_acquire(&lock->dep_map, 0, 0, 2, 2, NULL, (unsigned long)_RET_IP_); + lock_acquire(&lock->dep_map, 0, 0, 2, 1, NULL, (unsigned long)_RET_IP_); return pthread_rwlock_rdlock(&lock->rwlock); } @@ -49,19 +49,19 @@ static inline int liblockdep_pthread_rwlock_unlock(liblockdep_pthread_rwlock_t * static inline int liblockdep_pthread_rwlock_wrlock(liblockdep_pthread_rwlock_t *lock) { - lock_acquire(&lock->dep_map, 0, 0, 0, 2, NULL, (unsigned long)_RET_IP_); + lock_acquire(&lock->dep_map, 0, 0, 0, 1, NULL, (unsigned long)_RET_IP_); return pthread_rwlock_wrlock(&lock->rwlock); } static inline int liblockdep_pthread_rwlock_tryrdlock(liblockdep_pthread_rwlock_t *lock) { - lock_acquire(&lock->dep_map, 0, 1, 2, 2, NULL, (unsigned long)_RET_IP_); + lock_acquire(&lock->dep_map, 0, 1, 2, 1, NULL, (unsigned long)_RET_IP_); return pthread_rwlock_tryrdlock(&lock->rwlock) == 0 ? 1 : 0; } static inline int liblockdep_pthread_rwlock_trywlock(liblockdep_pthread_rwlock_t *lock) { - lock_acquire(&lock->dep_map, 0, 1, 0, 2, NULL, (unsigned long)_RET_IP_); + lock_acquire(&lock->dep_map, 0, 1, 0, 1, NULL, (unsigned long)_RET_IP_); return pthread_rwlock_trywlock(&lock->rwlock) == 0 ? 1 : 0; } diff --git a/tools/lib/lockdep/preload.c b/tools/lib/lockdep/preload.c index 51ec2ec8371be..6f803609e4982 100644 --- a/tools/lib/lockdep/preload.c +++ b/tools/lib/lockdep/preload.c @@ -252,7 +252,7 @@ int pthread_mutex_lock(pthread_mutex_t *mutex) try_init_preload(); - lock_acquire(&__get_lock(mutex)->dep_map, 0, 0, 0, 2, NULL, + lock_acquire(&__get_lock(mutex)->dep_map, 0, 0, 0, 1, NULL, (unsigned long)_RET_IP_); /* * Here's the thing with pthread mutexes: unlike the kernel variant, @@ -281,7 +281,7 @@ int pthread_mutex_trylock(pthread_mutex_t *mutex) try_init_preload(); - lock_acquire(&__get_lock(mutex)->dep_map, 0, 1, 0, 2, NULL, (unsigned long)_RET_IP_); + lock_acquire(&__get_lock(mutex)->dep_map, 0, 1, 0, 1, NULL, (unsigned long)_RET_IP_); r = ll_pthread_mutex_trylock(mutex); if (r) lock_release(&__get_lock(mutex)->dep_map, 0, (unsigned long)_RET_IP_); @@ -303,7 +303,7 @@ int pthread_mutex_unlock(pthread_mutex_t *mutex) */ r = ll_pthread_mutex_unlock(mutex); if (r) - lock_acquire(&__get_lock(mutex)->dep_map, 0, 0, 0, 2, NULL, (unsigned long)_RET_IP_); + lock_acquire(&__get_lock(mutex)->dep_map, 0, 0, 0, 1, NULL, (unsigned long)_RET_IP_); return r; } @@ -352,7 +352,7 @@ int pthread_rwlock_rdlock(pthread_rwlock_t *rwlock) init_preload(); - lock_acquire(&__get_lock(rwlock)->dep_map, 0, 0, 2, 2, NULL, (unsigned long)_RET_IP_); + lock_acquire(&__get_lock(rwlock)->dep_map, 0, 0, 2, 1, NULL, (unsigned long)_RET_IP_); r = ll_pthread_rwlock_rdlock(rwlock); if (r) lock_release(&__get_lock(rwlock)->dep_map, 0, (unsigned long)_RET_IP_); @@ -366,7 +366,7 @@ int pthread_rwlock_tryrdlock(pthread_rwlock_t *rwlock) init_preload(); - lock_acquire(&__get_lock(rwlock)->dep_map, 0, 1, 2, 2, NULL, (unsigned long)_RET_IP_); + lock_acquire(&__get_lock(rwlock)->dep_map, 0, 1, 2, 1, NULL, (unsigned long)_RET_IP_); r = ll_pthread_rwlock_tryrdlock(rwlock); if (r) lock_release(&__get_lock(rwlock)->dep_map, 0, (unsigned long)_RET_IP_); @@ -380,7 +380,7 @@ int pthread_rwlock_trywrlock(pthread_rwlock_t *rwlock) init_preload(); - lock_acquire(&__get_lock(rwlock)->dep_map, 0, 1, 0, 2, NULL, (unsigned long)_RET_IP_); + lock_acquire(&__get_lock(rwlock)->dep_map, 0, 1, 0, 1, NULL, (unsigned long)_RET_IP_); r = ll_pthread_rwlock_trywrlock(rwlock); if (r) lock_release(&__get_lock(rwlock)->dep_map, 0, (unsigned long)_RET_IP_); @@ -394,7 +394,7 @@ int pthread_rwlock_wrlock(pthread_rwlock_t *rwlock) init_preload(); - lock_acquire(&__get_lock(rwlock)->dep_map, 0, 0, 0, 2, NULL, (unsigned long)_RET_IP_); + lock_acquire(&__get_lock(rwlock)->dep_map, 0, 0, 0, 1, NULL, (unsigned long)_RET_IP_); r = ll_pthread_rwlock_wrlock(rwlock); if (r) lock_release(&__get_lock(rwlock)->dep_map, 0, (unsigned long)_RET_IP_); @@ -411,7 +411,7 @@ int pthread_rwlock_unlock(pthread_rwlock_t *rwlock) lock_release(&__get_lock(rwlock)->dep_map, 0, (unsigned long)_RET_IP_); r = ll_pthread_rwlock_unlock(rwlock); if (r) - lock_acquire(&__get_lock(rwlock)->dep_map, 0, 0, 0, 2, NULL, (unsigned long)_RET_IP_); + lock_acquire(&__get_lock(rwlock)->dep_map, 0, 0, 0, 1, NULL, (unsigned long)_RET_IP_); return r; } From 2c4db12ec469b9fcdad9f6bfd6fa20e65a563ac5 Mon Sep 17 00:00:00 2001 From: Andrea Merello Date: Sat, 5 Jul 2014 21:07:17 +0200 Subject: [PATCH 091/268] rt2800usb: Don't perform DMA from stack Function rt2800usb_autorun_detect() passes the address of a variable allocated onto the stack to be used for DMA by the USB layer. This has been caught by my debugging-enabled kernel. This patch change things in order to allocate that variable via kmalloc, and it adjusts things to handle the kmalloc failure case, propagating the error. [ 7363.238852] ------------[ cut here ]------------ [ 7363.243529] WARNING: CPU: 1 PID: 5235 at lib/dma-debug.c:1153 check_for_stack+0xa4/0xf0() [ 7363.251759] ehci-pci 0000:00:04.1: DMA-API: device driver maps memory fromstack [addr=ffff88006b81bad4] [ 7363.261210] Modules linked in: rt2800usb(O+) rt2800lib(O) rt2x00usb(O) rt2x00lib(O) rtl818x_pci(O) rtl8187 led_class eeprom_93cx6 mac80211 cfg80211 [last unloaded: rt2x00lib] [ 7363.277143] CPU: 1 PID: 5235 Comm: systemd-udevd Tainted: G O 3.16.0-rc3-wl+ #31 [ 7363.285546] Hardware name: System manufacturer System Product Name/M3N78 PRO, BIOS ASUS M3N78 PRO ACPI BIOS Revision 1402 12/04/2009 [ 7363.297511] 0000000000000009 ffff88006b81b710 ffffffff8175dcad ffff88006b81b758 [ 7363.305062] ffff88006b81b748 ffffffff8106d372 ffff88006cf10098 ffff88006cead6a0 [ 7363.312622] ffff88006b81bad4 ffffffff81c1e7c0 ffff88006cf10098 ffff88006b81b7a8 [ 7363.320161] Call Trace: [ 7363.322661] [] dump_stack+0x4d/0x6f [ 7363.327847] [] warn_slowpath_common+0x82/0xb0 [ 7363.333893] [] warn_slowpath_fmt+0x47/0x50 [ 7363.339686] [] check_for_stack+0xa4/0xf0 [ 7363.345298] [] debug_dma_map_page+0x10c/0x150 [ 7363.351367] [] usb_hcd_map_urb_for_dma+0x229/0x720 [ 7363.357890] [] usb_hcd_submit_urb+0x2fd/0x930 [ 7363.363929] [] ? irq_work_queue+0x71/0xd0 [ 7363.369617] [] ? wake_up_klogd+0x37/0x50 [ 7363.375219] [] ? console_unlock+0x1e5/0x420 [ 7363.381081] [] ? vprintk_emit+0x245/0x530 [ 7363.386773] [] usb_submit_urb+0x30c/0x580 [ 7363.392462] [] usb_start_wait_urb+0x65/0xf0 [ 7363.398325] [] usb_control_msg+0xcd/0x110 [ 7363.404014] [] rt2x00usb_vendor_request+0xbd/0x170 [rt2x00usb] [ 7363.411544] [] rt2800usb_autorun_detect+0x32/0x50 [rt2800usb] [ 7363.418986] [] rt2800usb_read_eeprom+0x11/0x70 [rt2800usb] [ 7363.426168] [] rt2800_probe_hw+0x11d/0xf90 [rt2800lib] [ 7363.432989] [] rt2800usb_probe_hw+0xd/0x50 [rt2800usb] [ 7363.439808] [] rt2x00lib_probe_dev+0x238/0x7c0 [rt2x00lib] [ 7363.446992] [] ? ieee80211_led_names+0xb8/0x100 [mac80211] [ 7363.454156] [] rt2x00usb_probe+0x156/0x1f0 [rt2x00usb] [ 7363.460971] [] rt2800usb_probe+0x10/0x20 [rt2800usb] [ 7363.467616] [] usb_probe_interface+0xce/0x1c0 [ 7363.473651] [] really_probe+0x70/0x240 [ 7363.479079] [] __driver_attach+0xa1/0xb0 [ 7363.484682] [] ? __device_attach+0x70/0x70 [ 7363.490461] [] bus_for_each_dev+0x63/0xa0 [ 7363.496146] [] driver_attach+0x19/0x20 [ 7363.501570] [] bus_add_driver+0x178/0x220 [ 7363.507270] [] driver_register+0x5b/0xe0 [ 7363.512874] [] usb_register_driver+0xa0/0x170 [ 7363.518905] [] ? 0xffffffffa0079fff [ 7363.524074] [] rt2800usb_driver_init+0x1e/0x20 [rt2800usb] [ 7363.531247] [] do_one_initcall+0x84/0x1b0 [ 7363.536932] [] ? kfree+0xd0/0x110 [ 7363.541931] [] ? __vunmap+0xaa/0xf0 [ 7363.547538] [] load_module+0x1aee/0x2040 [ 7363.553141] [] ? store_uevent+0x50/0x50 [ 7363.558676] [] SyS_init_module+0x9e/0xc0 [ 7363.564285] [] system_call_fastpath+0x16/0x1b [ 7363.570338] ---[ end trace 01ef5f822bea9882 ]--- Signed-off-by: Andrea Merello Acked-by: Helmut Schaa Signed-off-by: John W. Linville --- drivers/net/wireless/rt2x00/rt2800usb.c | 28 +++++++++++++++++++------ 1 file changed, 22 insertions(+), 6 deletions(-) diff --git a/drivers/net/wireless/rt2x00/rt2800usb.c b/drivers/net/wireless/rt2x00/rt2800usb.c index e11dab2216c6f..832006b5aab15 100644 --- a/drivers/net/wireless/rt2x00/rt2800usb.c +++ b/drivers/net/wireless/rt2x00/rt2800usb.c @@ -231,9 +231,12 @@ static enum hrtimer_restart rt2800usb_tx_sta_fifo_timeout(struct hrtimer *timer) */ static int rt2800usb_autorun_detect(struct rt2x00_dev *rt2x00dev) { - __le32 reg; + __le32 *reg; u32 fw_mode; + reg = kmalloc(sizeof(*reg), GFP_KERNEL); + if (reg == NULL) + return -ENOMEM; /* cannot use rt2x00usb_register_read here as it uses different * mode (MULTI_READ vs. DEVICE_MODE) and does not pass the * magic value USB_MODE_AUTORUN (0x11) to the device, thus the @@ -241,8 +244,9 @@ static int rt2800usb_autorun_detect(struct rt2x00_dev *rt2x00dev) */ rt2x00usb_vendor_request(rt2x00dev, USB_DEVICE_MODE, USB_VENDOR_REQUEST_IN, 0, USB_MODE_AUTORUN, - ®, sizeof(reg), REGISTER_TIMEOUT_FIRMWARE); - fw_mode = le32_to_cpu(reg); + reg, sizeof(*reg), REGISTER_TIMEOUT_FIRMWARE); + fw_mode = le32_to_cpu(*reg); + kfree(reg); if ((fw_mode & 0x00000003) == 2) return 1; @@ -261,6 +265,7 @@ static int rt2800usb_write_firmware(struct rt2x00_dev *rt2x00dev, int status; u32 offset; u32 length; + int retval; /* * Check which section of the firmware we need. @@ -278,7 +283,10 @@ static int rt2800usb_write_firmware(struct rt2x00_dev *rt2x00dev, /* * Write firmware to device. */ - if (rt2800usb_autorun_detect(rt2x00dev)) { + retval = rt2800usb_autorun_detect(rt2x00dev); + if (retval < 0) + return retval; + if (retval) { rt2x00_info(rt2x00dev, "Firmware loading not required - NIC in AutoRun mode\n"); } else { @@ -763,7 +771,12 @@ static void rt2800usb_fill_rxdone(struct queue_entry *entry, */ static int rt2800usb_efuse_detect(struct rt2x00_dev *rt2x00dev) { - if (rt2800usb_autorun_detect(rt2x00dev)) + int retval; + + retval = rt2800usb_autorun_detect(rt2x00dev); + if (retval < 0) + return retval; + if (retval) return 1; return rt2800_efuse_detect(rt2x00dev); } @@ -772,7 +785,10 @@ static int rt2800usb_read_eeprom(struct rt2x00_dev *rt2x00dev) { int retval; - if (rt2800usb_efuse_detect(rt2x00dev)) + retval = rt2800usb_efuse_detect(rt2x00dev); + if (retval < 0) + return retval; + if (retval) retval = rt2800_read_eeprom_efuse(rt2x00dev); else retval = rt2x00usb_eeprom_read(rt2x00dev, rt2x00dev->eeprom, From 19a44ecff52fd67d77d49fb4d43b289c53cdc392 Mon Sep 17 00:00:00 2001 From: Alexander Graf Date: Mon, 7 Jul 2014 21:05:33 +0200 Subject: [PATCH 092/268] KVM: PPC: RTAS: Do byte swaps explicitly In commit b59d9d26b we introduced implicit byte swaps for RTAS calls. Unfortunately we messed up and didn't swizzle return values properly. Also the old approach wasn't "sparse" compatible - we were randomly reading __be32 values on an LE system. Let's just do all of the swizzling explicitly with byte swaps right where values get used. That way we can at least catch bugs using sparse. This patch fixes XICS RTAS emulation on little endian hosts for me. Signed-off-by: Alexander Graf --- arch/powerpc/kvm/book3s_rtas.c | 65 ++++++++++------------------------ 1 file changed, 18 insertions(+), 47 deletions(-) diff --git a/arch/powerpc/kvm/book3s_rtas.c b/arch/powerpc/kvm/book3s_rtas.c index edb14ba992b34..ef27fbd5d9c54 100644 --- a/arch/powerpc/kvm/book3s_rtas.c +++ b/arch/powerpc/kvm/book3s_rtas.c @@ -23,20 +23,20 @@ static void kvm_rtas_set_xive(struct kvm_vcpu *vcpu, struct rtas_args *args) u32 irq, server, priority; int rc; - if (args->nargs != 3 || args->nret != 1) { + if (be32_to_cpu(args->nargs) != 3 || be32_to_cpu(args->nret) != 1) { rc = -3; goto out; } - irq = args->args[0]; - server = args->args[1]; - priority = args->args[2]; + irq = be32_to_cpu(args->args[0]); + server = be32_to_cpu(args->args[1]); + priority = be32_to_cpu(args->args[2]); rc = kvmppc_xics_set_xive(vcpu->kvm, irq, server, priority); if (rc) rc = -3; out: - args->rets[0] = rc; + args->rets[0] = cpu_to_be32(rc); } static void kvm_rtas_get_xive(struct kvm_vcpu *vcpu, struct rtas_args *args) @@ -44,12 +44,12 @@ static void kvm_rtas_get_xive(struct kvm_vcpu *vcpu, struct rtas_args *args) u32 irq, server, priority; int rc; - if (args->nargs != 1 || args->nret != 3) { + if (be32_to_cpu(args->nargs) != 1 || be32_to_cpu(args->nret) != 3) { rc = -3; goto out; } - irq = args->args[0]; + irq = be32_to_cpu(args->args[0]); server = priority = 0; rc = kvmppc_xics_get_xive(vcpu->kvm, irq, &server, &priority); @@ -58,10 +58,10 @@ static void kvm_rtas_get_xive(struct kvm_vcpu *vcpu, struct rtas_args *args) goto out; } - args->rets[1] = server; - args->rets[2] = priority; + args->rets[1] = cpu_to_be32(server); + args->rets[2] = cpu_to_be32(priority); out: - args->rets[0] = rc; + args->rets[0] = cpu_to_be32(rc); } static void kvm_rtas_int_off(struct kvm_vcpu *vcpu, struct rtas_args *args) @@ -69,18 +69,18 @@ static void kvm_rtas_int_off(struct kvm_vcpu *vcpu, struct rtas_args *args) u32 irq; int rc; - if (args->nargs != 1 || args->nret != 1) { + if (be32_to_cpu(args->nargs) != 1 || be32_to_cpu(args->nret) != 1) { rc = -3; goto out; } - irq = args->args[0]; + irq = be32_to_cpu(args->args[0]); rc = kvmppc_xics_int_off(vcpu->kvm, irq); if (rc) rc = -3; out: - args->rets[0] = rc; + args->rets[0] = cpu_to_be32(rc); } static void kvm_rtas_int_on(struct kvm_vcpu *vcpu, struct rtas_args *args) @@ -88,18 +88,18 @@ static void kvm_rtas_int_on(struct kvm_vcpu *vcpu, struct rtas_args *args) u32 irq; int rc; - if (args->nargs != 1 || args->nret != 1) { + if (be32_to_cpu(args->nargs) != 1 || be32_to_cpu(args->nret) != 1) { rc = -3; goto out; } - irq = args->args[0]; + irq = be32_to_cpu(args->args[0]); rc = kvmppc_xics_int_on(vcpu->kvm, irq); if (rc) rc = -3; out: - args->rets[0] = rc; + args->rets[0] = cpu_to_be32(rc); } #endif /* CONFIG_KVM_XICS */ @@ -205,32 +205,6 @@ int kvm_vm_ioctl_rtas_define_token(struct kvm *kvm, void __user *argp) return rc; } -static void kvmppc_rtas_swap_endian_in(struct rtas_args *args) -{ -#ifdef __LITTLE_ENDIAN__ - int i; - - args->token = be32_to_cpu(args->token); - args->nargs = be32_to_cpu(args->nargs); - args->nret = be32_to_cpu(args->nret); - for (i = 0; i < args->nargs; i++) - args->args[i] = be32_to_cpu(args->args[i]); -#endif -} - -static void kvmppc_rtas_swap_endian_out(struct rtas_args *args) -{ -#ifdef __LITTLE_ENDIAN__ - int i; - - for (i = 0; i < args->nret; i++) - args->args[i] = cpu_to_be32(args->args[i]); - args->token = cpu_to_be32(args->token); - args->nargs = cpu_to_be32(args->nargs); - args->nret = cpu_to_be32(args->nret); -#endif -} - int kvmppc_rtas_hcall(struct kvm_vcpu *vcpu) { struct rtas_token_definition *d; @@ -249,8 +223,6 @@ int kvmppc_rtas_hcall(struct kvm_vcpu *vcpu) if (rc) goto fail; - kvmppc_rtas_swap_endian_in(&args); - /* * args->rets is a pointer into args->args. Now that we've * copied args we need to fix it up to point into our copy, @@ -258,13 +230,13 @@ int kvmppc_rtas_hcall(struct kvm_vcpu *vcpu) * value so we can restore it on the way out. */ orig_rets = args.rets; - args.rets = &args.args[args.nargs]; + args.rets = &args.args[be32_to_cpu(args.nargs)]; mutex_lock(&vcpu->kvm->lock); rc = -ENOENT; list_for_each_entry(d, &vcpu->kvm->arch.rtas_tokens, list) { - if (d->token == args.token) { + if (d->token == be32_to_cpu(args.token)) { d->handler->handler(vcpu, &args); rc = 0; break; @@ -275,7 +247,6 @@ int kvmppc_rtas_hcall(struct kvm_vcpu *vcpu) if (rc == 0) { args.rets = orig_rets; - kvmppc_rtas_swap_endian_out(&args); rc = kvm_write_guest(vcpu->kvm, args_phys, &args, sizeof(args)); if (rc) goto fail; From 68b7107b62983f2cff0948292429d5f5999df096 Mon Sep 17 00:00:00 2001 From: Edward Allcutt Date: Mon, 30 Jun 2014 16:16:02 +0100 Subject: [PATCH 093/268] ipv4: icmp: Fix pMTU handling for rare case Some older router implementations still send Fragmentation Needed errors with the Next-Hop MTU field set to zero. This is explicitly described as an eventuality that hosts must deal with by the standard (RFC 1191) since older standards specified that those bits must be zero. Linux had a generic (for all of IPv4) implementation of the algorithm described in the RFC for searching a list of MTU plateaus for a good value. Commit 46517008e116 ("ipv4: Kill ip_rt_frag_needed().") removed this as part of the changes to remove the routing cache. Subsequently any Fragmentation Needed packet with a zero Next-Hop MTU has been discarded without being passed to the per-protocol handlers or notifying userspace for raw sockets. When there is a router which does not implement RFC 1191 on an MTU limited path then this results in stalled connections since large packets are discarded and the local protocols are not notified so they never attempt to lower the pMTU. One example I have seen is an OpenBSD router terminating IPSec tunnels. It's worth pointing out that this case is distinct from the BSD 4.2 bug which incorrectly calculated the Next-Hop MTU since the commit in question dismissed that as a valid concern. All of the per-protocols handlers implement the simple approach from RFC 1191 of immediately falling back to the minimum value. Although this is sub-optimal it is vastly preferable to connections hanging indefinitely. Remove the Next-Hop MTU != 0 check and allow such packets to follow the normal path. Fixes: 46517008e116 ("ipv4: Kill ip_rt_frag_needed().") Signed-off-by: Edward Allcutt Signed-off-by: David S. Miller --- net/ipv4/icmp.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/net/ipv4/icmp.c b/net/ipv4/icmp.c index 79c3d947a4812..42b7bcf8045be 100644 --- a/net/ipv4/icmp.c +++ b/net/ipv4/icmp.c @@ -739,8 +739,6 @@ static void icmp_unreach(struct sk_buff *skb) /* fall through */ case 0: info = ntohs(icmph->un.frag.mtu); - if (!info) - goto out; } break; case ICMP_SR_FAILED: From 11ef7a8996d5d433c9cd75d80651297eccbf6d42 Mon Sep 17 00:00:00 2001 From: Tom Herbert Date: Mon, 30 Jun 2014 09:50:40 -0700 Subject: [PATCH 094/268] net: Performance fix for process_backlog In process_backlog the input_pkt_queue is only checked once for new packets and quota is artificially reduced to reflect precisely the number of packets on the input_pkt_queue so that the loop exits appropriately. This patches changes the behavior to be more straightforward and less convoluted. Packets are processed until either the quota is met or there are no more packets to process. This patch seems to provide a small, but noticeable performance improvement. The performance improvement is a result of staying in the process_backlog loop longer which can reduce number of IPI's. Performance data using super_netperf TCP_RR with 200 flows: Before fix: 88.06% CPU utilization 125/190/309 90/95/99% latencies 1.46808e+06 tps 1145382 intrs.sec. With fix: 87.73% CPU utilization 122/183/296 90/95/99% latencies 1.4921e+06 tps 1021674.30 intrs./sec. Signed-off-by: Tom Herbert Acked-by: Eric Dumazet Signed-off-by: David S. Miller --- net/core/dev.c | 21 ++++++++++----------- 1 file changed, 10 insertions(+), 11 deletions(-) diff --git a/net/core/dev.c b/net/core/dev.c index 30eedf6779138..77c19c7bb490b 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -4227,9 +4227,8 @@ static int process_backlog(struct napi_struct *napi, int quota) #endif napi->weight = weight_p; local_irq_disable(); - while (work < quota) { + while (1) { struct sk_buff *skb; - unsigned int qlen; while ((skb = __skb_dequeue(&sd->process_queue))) { local_irq_enable(); @@ -4243,24 +4242,24 @@ static int process_backlog(struct napi_struct *napi, int quota) } rps_lock(sd); - qlen = skb_queue_len(&sd->input_pkt_queue); - if (qlen) - skb_queue_splice_tail_init(&sd->input_pkt_queue, - &sd->process_queue); - - if (qlen < quota - work) { + if (skb_queue_empty(&sd->input_pkt_queue)) { /* * Inline a custom version of __napi_complete(). * only current cpu owns and manipulates this napi, - * and NAPI_STATE_SCHED is the only possible flag set on backlog. - * we can use a plain write instead of clear_bit(), + * and NAPI_STATE_SCHED is the only possible flag set + * on backlog. + * We can use a plain write instead of clear_bit(), * and we dont need an smp_mb() memory barrier. */ list_del(&napi->poll_list); napi->state = 0; + rps_unlock(sd); - quota = work + qlen; + break; } + + skb_queue_splice_tail_init(&sd->input_pkt_queue, + &sd->process_queue); rps_unlock(sd); } local_irq_enable(); From 8844a00626c78c30e8cd757f2a048038f4983c53 Mon Sep 17 00:00:00 2001 From: Zhao Qiang Date: Tue, 1 Jul 2014 09:21:34 +0800 Subject: [PATCH 095/268] powerpc/ucc_geth: deal with a compile warning deal with a compile warning: comparison between 'enum qe_fltr_largest_external_tbl_lookup_key_size' and 'enum qe_fltr_tbl_lookup_key_size' the code: "if (ug_info->largestexternallookupkeysize == QE_FLTR_TABLE_LOOKUP_KEY_SIZE_8_BYTES)" is warned because different enum, so modify it. "enum qe_fltr_largest_external_tbl_lookup_key_size largestexternallookupkeysize; enum qe_fltr_tbl_lookup_key_size { QE_FLTR_TABLE_LOOKUP_KEY_SIZE_8_BYTES = 0x3f, /* LookupKey parsed by the Generate LookupKey CMD is truncated to 8 bytes */ QE_FLTR_TABLE_LOOKUP_KEY_SIZE_16_BYTES = 0x5f, /* LookupKey parsed by the Generate LookupKey CMD is truncated to 16 bytes */ }; /* QE FLTR extended filtering Largest External Table Lookup Key Size */ enum qe_fltr_largest_external_tbl_lookup_key_size { QE_FLTR_LARGEST_EXTERNAL_TABLE_LOOKUP_KEY_SIZE_NONE = 0x0,/* not used */ QE_FLTR_LARGEST_EXTERNAL_TABLE_LOOKUP_KEY_SIZE_8_BYTES = QE_FLTR_TABLE_LOOKUP_KEY_SIZE_8_BYTES, /* 8 bytes */ QE_FLTR_LARGEST_EXTERNAL_TABLE_LOOKUP_KEY_SIZE_16_BYTES = QE_FLTR_TABLE_LOOKUP_KEY_SIZE_16_BYTES, /* 16 bytes */ };" Signed-off-by: Zhao Qiang Signed-off-by: David S. Miller --- drivers/net/ethernet/freescale/ucc_geth.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/freescale/ucc_geth.c b/drivers/net/ethernet/freescale/ucc_geth.c index fab39e2954410..36fc429298e35 100644 --- a/drivers/net/ethernet/freescale/ucc_geth.c +++ b/drivers/net/ethernet/freescale/ucc_geth.c @@ -2990,11 +2990,11 @@ static int ucc_geth_startup(struct ucc_geth_private *ugeth) if (ug_info->rxExtendedFiltering) { size += THREAD_RX_PRAM_ADDITIONAL_FOR_EXTENDED_FILTERING; if (ug_info->largestexternallookupkeysize == - QE_FLTR_TABLE_LOOKUP_KEY_SIZE_8_BYTES) + QE_FLTR_LARGEST_EXTERNAL_TABLE_LOOKUP_KEY_SIZE_8_BYTES) size += THREAD_RX_PRAM_ADDITIONAL_FOR_EXTENDED_FILTERING_8; if (ug_info->largestexternallookupkeysize == - QE_FLTR_TABLE_LOOKUP_KEY_SIZE_16_BYTES) + QE_FLTR_LARGEST_EXTERNAL_TABLE_LOOKUP_KEY_SIZE_16_BYTES) size += THREAD_RX_PRAM_ADDITIONAL_FOR_EXTENDED_FILTERING_16; } From 26a9ebca982d52b96c921f7344b7c01b1e2d9672 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Mon, 7 Jul 2014 19:53:45 -0700 Subject: [PATCH 096/268] Revert "net: stmmac: add platform init/exit for Altera's ARM socfpga" This reverts commit 0acf16768740776feffac506ce93b1c06c059ac6. Breaks the build due to missing reference to phy_resume in the resulting dwmac-socfpga.o object. Signed-off-by: David S. Miller --- .../ethernet/stmicro/stmmac/dwmac-socfpga.c | 69 ------------------- .../net/ethernet/stmicro/stmmac/stmmac_main.c | 4 -- 2 files changed, 73 deletions(-) diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-socfpga.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-socfpga.c index ec632e666c56a..fd8a217556a14 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac-socfpga.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-socfpga.c @@ -20,9 +20,7 @@ #include #include #include -#include #include -#include "stmmac.h" #define SYSMGR_EMACGRP_CTRL_PHYSEL_ENUM_GMII_MII 0x0 #define SYSMGR_EMACGRP_CTRL_PHYSEL_ENUM_RGMII 0x1 @@ -36,7 +34,6 @@ struct socfpga_dwmac { u32 reg_shift; struct device *dev; struct regmap *sys_mgr_base_addr; - struct reset_control *stmmac_rst; }; static int socfpga_dwmac_parse_data(struct socfpga_dwmac *dwmac, struct device *dev) @@ -46,13 +43,6 @@ static int socfpga_dwmac_parse_data(struct socfpga_dwmac *dwmac, struct device * u32 reg_offset, reg_shift; int ret; - dwmac->stmmac_rst = devm_reset_control_get(dev, - STMMAC_RESOURCE_NAME); - if (IS_ERR(dwmac->stmmac_rst)) { - dev_info(dev, "Could not get reset control!\n"); - return -EINVAL; - } - dwmac->interface = of_get_phy_mode(np); sys_mgr_base_addr = syscon_regmap_lookup_by_phandle(np, "altr,sysmgr-syscon"); @@ -135,65 +125,6 @@ static void *socfpga_dwmac_probe(struct platform_device *pdev) return dwmac; } -static void socfpga_dwmac_exit(struct platform_device *pdev, void *priv) -{ - struct socfpga_dwmac *dwmac = priv; - - /* On socfpga platform exit, assert and hold reset to the - * enet controller - the default state after a hard reset. - */ - if (dwmac->stmmac_rst) - reset_control_assert(dwmac->stmmac_rst); -} - -static int socfpga_dwmac_init(struct platform_device *pdev, void *priv) -{ - struct socfpga_dwmac *dwmac = priv; - struct net_device *ndev = platform_get_drvdata(pdev); - struct stmmac_priv *stpriv = NULL; - int ret = 0; - - if (ndev) - stpriv = netdev_priv(ndev); - - /* Assert reset to the enet controller before changing the phy mode */ - if (dwmac->stmmac_rst) - reset_control_assert(dwmac->stmmac_rst); - - /* Setup the phy mode in the system manager registers according to - * devicetree configuration - */ - ret = socfpga_dwmac_setup(dwmac); - - /* Deassert reset for the phy configuration to be sampled by - * the enet controller, and operation to start in requested mode - */ - if (dwmac->stmmac_rst) - reset_control_deassert(dwmac->stmmac_rst); - - /* Before the enet controller is suspended, the phy is suspended. - * This causes the phy clock to be gated. The enet controller is - * resumed before the phy, so the clock is still gated "off" when - * the enet controller is resumed. This code makes sure the phy - * is "resumed" before reinitializing the enet controller since - * the enet controller depends on an active phy clock to complete - * a DMA reset. A DMA reset will "time out" if executed - * with no phy clock input on the Synopsys enet controller. - * Verified through Synopsys Case #8000711656. - * - * Note that the phy clock is also gated when the phy is isolated. - * Phy "suspend" and "isolate" controls are located in phy basic - * control register 0, and can be modified by the phy driver - * framework. - */ - if (stpriv && stpriv->phydev) - phy_resume(stpriv->phydev); - - return ret; -} - const struct stmmac_of_data socfpga_gmac_data = { .setup = socfpga_dwmac_probe, - .init = socfpga_dwmac_init, - .exit = socfpga_dwmac_exit, }; diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c index 18315f34938bd..057a1208e5947 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c @@ -2878,10 +2878,6 @@ int stmmac_suspend(struct net_device *ndev) clk_disable_unprepare(priv->stmmac_clk); } spin_unlock_irqrestore(&priv->lock, flags); - - priv->oldlink = 0; - priv->speed = 0; - priv->oldduplex = -1; return 0; } From 8dcb4b1526747d8431f9895e153dd478c9d16186 Mon Sep 17 00:00:00 2001 From: Bernd Wachter Date: Tue, 1 Jul 2014 22:01:09 +0300 Subject: [PATCH 097/268] net: qmi_wwan: Add ID for Telewell TW-LTE 4G v2 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit There's a new version of the Telewell 4G modem working with, but not recognized by this driver. Signed-off-by: Bernd Wachter Acked-by: Bjørn Mork Signed-off-by: David S. Miller --- drivers/net/usb/qmi_wwan.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/usb/qmi_wwan.c b/drivers/net/usb/qmi_wwan.c index cf62d7e8329f1..c4638c67f6b9f 100644 --- a/drivers/net/usb/qmi_wwan.c +++ b/drivers/net/usb/qmi_wwan.c @@ -741,6 +741,7 @@ static const struct usb_device_id products[] = { {QMI_FIXED_INTF(0x19d2, 0x1424, 2)}, {QMI_FIXED_INTF(0x19d2, 0x1425, 2)}, {QMI_FIXED_INTF(0x19d2, 0x1426, 2)}, /* ZTE MF91 */ + {QMI_FIXED_INTF(0x19d2, 0x1428, 2)}, /* Telewell TW-LTE 4G v2 */ {QMI_FIXED_INTF(0x19d2, 0x2002, 4)}, /* ZTE (Vodafone) K3765-Z */ {QMI_FIXED_INTF(0x0f3d, 0x68a2, 8)}, /* Sierra Wireless MC7700 */ {QMI_FIXED_INTF(0x114f, 0x68a2, 8)}, /* Sierra Wireless MC7750 */ From 54951194656e4853e441266fd095f880bc0398f3 Mon Sep 17 00:00:00 2001 From: Loic Prylli Date: Tue, 1 Jul 2014 21:39:43 -0700 Subject: [PATCH 098/268] net: Fix NETDEV_CHANGE notifier usage causing spurious arp flush A bug was introduced in NETDEV_CHANGE notifier sequence causing the arp table to be sometimes spuriously cleared (including manual arp entries marked permanent), upon network link carrier changes. The changed argument for the notifier was applied only to a single caller of NETDEV_CHANGE, missing among others netdev_state_change(). So upon net_carrier events induced by the network, which are triggering a call to netdev_state_change(), arp_netdev_event() would decide whether to clear or not arp cache based on random/junk stack values (a kind of read buffer overflow). Fixes: be9efd365328 ("net: pass changed flags along with NETDEV_CHANGE event") Fixes: 6c8b4e3ff81b ("arp: flush arp cache on IFF_NOARP change") Signed-off-by: Loic Prylli Signed-off-by: David S. Miller --- net/core/dev.c | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/net/core/dev.c b/net/core/dev.c index 77c19c7bb490b..7990984ca3640 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -148,6 +148,9 @@ struct list_head ptype_all __read_mostly; /* Taps */ static struct list_head offload_base __read_mostly; static int netif_rx_internal(struct sk_buff *skb); +static int call_netdevice_notifiers_info(unsigned long val, + struct net_device *dev, + struct netdev_notifier_info *info); /* * The @dev_base_head list is protected by @dev_base_lock and the rtnl @@ -1207,7 +1210,11 @@ EXPORT_SYMBOL(netdev_features_change); void netdev_state_change(struct net_device *dev) { if (dev->flags & IFF_UP) { - call_netdevice_notifiers(NETDEV_CHANGE, dev); + struct netdev_notifier_change_info change_info; + + change_info.flags_changed = 0; + call_netdevice_notifiers_info(NETDEV_CHANGE, dev, + &change_info.info); rtmsg_ifinfo(RTM_NEWLINK, dev, 0, GFP_KERNEL); } } From 52ad353a5344f1f700c5b777175bdfa41d3cd65a Mon Sep 17 00:00:00 2001 From: dingtianhong Date: Wed, 2 Jul 2014 13:50:48 +0800 Subject: [PATCH 099/268] igmp: fix the problem when mc leave group The problem was triggered by these steps: 1) create socket, bind and then setsockopt for add mc group. mreq.imr_multiaddr.s_addr = inet_addr("255.0.0.37"); mreq.imr_interface.s_addr = inet_addr("192.168.1.2"); setsockopt(sockfd, IPPROTO_IP, IP_ADD_MEMBERSHIP, &mreq, sizeof(mreq)); 2) drop the mc group for this socket. mreq.imr_multiaddr.s_addr = inet_addr("255.0.0.37"); mreq.imr_interface.s_addr = inet_addr("0.0.0.0"); setsockopt(sockfd, IPPROTO_IP, IP_DROP_MEMBERSHIP, &mreq, sizeof(mreq)); 3) and then drop the socket, I found the mc group was still used by the dev: netstat -g Interface RefCnt Group --------------- ------ --------------------- eth2 1 255.0.0.37 Normally even though the IP_DROP_MEMBERSHIP return error, the mc group still need to be released for the netdev when drop the socket, but this process was broken when route default is NULL, the reason is that: The ip_mc_leave_group() will choose the in_dev by the imr_interface.s_addr, if input addr is NULL, the default route dev will be chosen, then the ifindex is got from the dev, then polling the inet->mc_list and return -ENODEV, but if the default route dev is NULL, the in_dev and ifIndex is both NULL, when polling the inet->mc_list, the mc group will be released from the mc_list, but the dev didn't dec the refcnt for this mc group, so when dropping the socket, the mc_list is NULL and the dev still keep this group. v1->v2: According Hideaki's suggestion, we should align with IPv6 (RFC3493) and BSDs, so I add the checking for the in_dev before polling the mc_list, make sure when we remove the mc group, dec the refcnt to the real dev which was using the mc address. The problem would never happened again. Signed-off-by: Ding Tianhong Signed-off-by: David S. Miller --- net/ipv4/igmp.c | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/net/ipv4/igmp.c b/net/ipv4/igmp.c index 6748d420f714f..db710b059bab3 100644 --- a/net/ipv4/igmp.c +++ b/net/ipv4/igmp.c @@ -1944,6 +1944,10 @@ int ip_mc_leave_group(struct sock *sk, struct ip_mreqn *imr) rtnl_lock(); in_dev = ip_mc_find_dev(net, imr); + if (!in_dev) { + ret = -ENODEV; + goto out; + } ifindex = imr->imr_ifindex; for (imlp = &inet->mc_list; (iml = rtnl_dereference(*imlp)) != NULL; @@ -1961,16 +1965,14 @@ int ip_mc_leave_group(struct sock *sk, struct ip_mreqn *imr) *imlp = iml->next_rcu; - if (in_dev) - ip_mc_dec_group(in_dev, group); + ip_mc_dec_group(in_dev, group); rtnl_unlock(); /* decrease mem now to avoid the memleak warning */ atomic_sub(sizeof(*iml), &sk->sk_omem_alloc); kfree_rcu(iml, rcu); return 0; } - if (!in_dev) - ret = -ENODEV; +out: rtnl_unlock(); return ret; } From e326f2f13b209d56782609e833b87cb497e64b3b Mon Sep 17 00:00:00 2001 From: Or Gerlitz Date: Wed, 2 Jul 2014 17:36:23 +0300 Subject: [PATCH 100/268] net/mlx4_en: Don't configure the HW vxlan parser when vxlan offloading isn't set The add_vxlan_port ndo driver code was wrongly testing whether HW vxlan offloads are supported by the device instead of checking if they are currently enabled. This causes the driver to configure the HW parser to conduct matching for vxlan packets but since no steering rules were set, vxlan packets are dropped on RX. Fix that by doing the right test, as done in the del_vxlan_port ndo handler. Fixes: 1b136de ('net/mlx4: Implement vxlan ndo calls') Signed-off-by: Or Gerlitz Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlx4/en_netdev.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/mellanox/mlx4/en_netdev.c b/drivers/net/ethernet/mellanox/mlx4/en_netdev.c index 7d4fb7bf25933..e94d96f0ef55f 100644 --- a/drivers/net/ethernet/mellanox/mlx4/en_netdev.c +++ b/drivers/net/ethernet/mellanox/mlx4/en_netdev.c @@ -2336,7 +2336,7 @@ static void mlx4_en_add_vxlan_port(struct net_device *dev, struct mlx4_en_priv *priv = netdev_priv(dev); __be16 current_port; - if (!(priv->mdev->dev->caps.flags2 & MLX4_DEV_CAP_FLAG2_VXLAN_OFFLOADS)) + if (priv->mdev->dev->caps.tunnel_offload_mode != MLX4_TUNNEL_OFFLOAD_MODE_VXLAN) return; if (sa_family == AF_INET6) From 6e08d5e3c8236e7484229e46fdf92006e1dd4c49 Mon Sep 17 00:00:00 2001 From: Yuchung Cheng Date: Wed, 2 Jul 2014 12:07:16 -0700 Subject: [PATCH 101/268] tcp: fix false undo corner cases The undo code assumes that, upon entering loss recovery, TCP 1) always retransmit something 2) the retransmission never fails locally (e.g., qdisc drop) so undo_marker is set in tcp_enter_recovery() and undo_retrans is incremented only when tcp_retransmit_skb() is successful. When the assumption is broken because TCP's cwnd is too small to retransmit or the retransmit fails locally. The next (DUP)ACK would incorrectly revert the cwnd and the congestion state in tcp_try_undo_dsack() or tcp_may_undo(). Subsequent (DUP)ACKs may enter the recovery state. The sender repeatedly enter and (incorrectly) exit recovery states if the retransmits continue to fail locally while receiving (DUP)ACKs. The fix is to initialize undo_retrans to -1 and start counting on the first retransmission. Always increment undo_retrans even if the retransmissions fail locally because they couldn't cause DSACKs to undo the cwnd reduction. Signed-off-by: Yuchung Cheng Signed-off-by: Neal Cardwell Signed-off-by: David S. Miller --- net/ipv4/tcp_input.c | 8 ++++---- net/ipv4/tcp_output.c | 6 ++++-- 2 files changed, 8 insertions(+), 6 deletions(-) diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index b5c23756965ae..40639c288dc22 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -1106,7 +1106,7 @@ static bool tcp_check_dsack(struct sock *sk, const struct sk_buff *ack_skb, } /* D-SACK for already forgotten data... Do dumb counting. */ - if (dup_sack && tp->undo_marker && tp->undo_retrans && + if (dup_sack && tp->undo_marker && tp->undo_retrans > 0 && !after(end_seq_0, prior_snd_una) && after(end_seq_0, tp->undo_marker)) tp->undo_retrans--; @@ -1187,7 +1187,7 @@ static u8 tcp_sacktag_one(struct sock *sk, /* Account D-SACK for retransmitted packet. */ if (dup_sack && (sacked & TCPCB_RETRANS)) { - if (tp->undo_marker && tp->undo_retrans && + if (tp->undo_marker && tp->undo_retrans > 0 && after(end_seq, tp->undo_marker)) tp->undo_retrans--; if (sacked & TCPCB_SACKED_ACKED) @@ -1893,7 +1893,7 @@ static void tcp_clear_retrans_partial(struct tcp_sock *tp) tp->lost_out = 0; tp->undo_marker = 0; - tp->undo_retrans = 0; + tp->undo_retrans = -1; } void tcp_clear_retrans(struct tcp_sock *tp) @@ -2665,7 +2665,7 @@ static void tcp_enter_recovery(struct sock *sk, bool ece_ack) tp->prior_ssthresh = 0; tp->undo_marker = tp->snd_una; - tp->undo_retrans = tp->retrans_out; + tp->undo_retrans = tp->retrans_out ? : -1; if (inet_csk(sk)->icsk_ca_state < TCP_CA_CWR) { if (!ece_ack) diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index d92bce0ea24ec..179b51e6bda33 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -2525,8 +2525,6 @@ int tcp_retransmit_skb(struct sock *sk, struct sk_buff *skb) if (!tp->retrans_stamp) tp->retrans_stamp = TCP_SKB_CB(skb)->when; - tp->undo_retrans += tcp_skb_pcount(skb); - /* snd_nxt is stored to detect loss of retransmitted segment, * see tcp_input.c tcp_sacktag_write_queue(). */ @@ -2534,6 +2532,10 @@ int tcp_retransmit_skb(struct sock *sk, struct sk_buff *skb) } else if (err != -EBUSY) { NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPRETRANSFAIL); } + + if (tp->undo_retrans < 0) + tp->undo_retrans = 0; + tp->undo_retrans += tcp_skb_pcount(skb); return err; } From 9242b5b60df8b13b469bc6b7be08ff6ebb551ad3 Mon Sep 17 00:00:00 2001 From: Bandan Das Date: Tue, 8 Jul 2014 00:30:23 -0400 Subject: [PATCH 102/268] KVM: x86: Check for nested events if there is an injectable interrupt With commit b6b8a1451fc40412c57d1 that introduced vmx_check_nested_events, checks for injectable interrupts happen at different points in time for L1 and L2 that could potentially cause a race. The regression occurs because KVM_REQ_EVENT is always set when nested_run_pending is set even if there's no pending interrupt. Consequently, there could be a small window when check_nested_events returns without exiting to L1, but an interrupt comes through soon after and it incorrectly, gets injected to L2 by inject_pending_event Fix this by adding a call to check for nested events too when a check for injectable interrupt returns true Signed-off-by: Bandan Das Signed-off-by: Paolo Bonzini --- arch/x86/kvm/x86.c | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index f6449334ec451..ef432f891d30a 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -5887,6 +5887,18 @@ static int inject_pending_event(struct kvm_vcpu *vcpu, bool req_int_win) kvm_x86_ops->set_nmi(vcpu); } } else if (kvm_cpu_has_injectable_intr(vcpu)) { + /* + * Because interrupts can be injected asynchronously, we are + * calling check_nested_events again here to avoid a race condition. + * See https://lkml.org/lkml/2014/7/2/60 for discussion about this + * proposal and current concerns. Perhaps we should be setting + * KVM_REQ_EVENT only on certain events and not unconditionally? + */ + if (is_guest_mode(vcpu) && kvm_x86_ops->check_nested_events) { + r = kvm_x86_ops->check_nested_events(vcpu, req_int_win); + if (r != 0) + return r; + } if (kvm_x86_ops->interrupt_allowed(vcpu)) { kvm_queue_interrupt(vcpu, kvm_cpu_get_interrupt(vcpu), false); From 16927776ae757d0d132bdbfabbfe2c498342bd59 Mon Sep 17 00:00:00 2001 From: John Stultz Date: Mon, 7 Jul 2014 14:06:11 -0700 Subject: [PATCH 103/268] alarmtimer: Fix bug where relative alarm timers were treated as absolute Sharvil noticed with the posix timer_settime interface, using the CLOCK_REALTIME_ALARM or CLOCK_BOOTTIME_ALARM clockid, if the users tried to specify a relative time timer, it would incorrectly be treated as absolute regardless of the state of the flags argument. This patch corrects this, properly checking the absolute/relative flag, as well as adds further error checking that no invalid flag bits are set. Reported-by: Sharvil Nanavati Signed-off-by: John Stultz Cc: Thomas Gleixner Cc: Ingo Molnar Cc: Prarit Bhargava Cc: Sharvil Nanavati Cc: stable #3.0+ Link: http://lkml.kernel.org/r/1404767171-6902-1-git-send-email-john.stultz@linaro.org Signed-off-by: Thomas Gleixner --- kernel/time/alarmtimer.c | 20 ++++++++++++++++++-- 1 file changed, 18 insertions(+), 2 deletions(-) diff --git a/kernel/time/alarmtimer.c b/kernel/time/alarmtimer.c index 88c9c65a430da..fe75444ae7ec3 100644 --- a/kernel/time/alarmtimer.c +++ b/kernel/time/alarmtimer.c @@ -585,9 +585,14 @@ static int alarm_timer_set(struct k_itimer *timr, int flags, struct itimerspec *new_setting, struct itimerspec *old_setting) { + ktime_t exp; + if (!rtcdev) return -ENOTSUPP; + if (flags & ~TIMER_ABSTIME) + return -EINVAL; + if (old_setting) alarm_timer_get(timr, old_setting); @@ -597,8 +602,16 @@ static int alarm_timer_set(struct k_itimer *timr, int flags, /* start the timer */ timr->it.alarm.interval = timespec_to_ktime(new_setting->it_interval); - alarm_start(&timr->it.alarm.alarmtimer, - timespec_to_ktime(new_setting->it_value)); + exp = timespec_to_ktime(new_setting->it_value); + /* Convert (if necessary) to absolute time */ + if (flags != TIMER_ABSTIME) { + ktime_t now; + + now = alarm_bases[timr->it.alarm.alarmtimer.type].gettime(); + exp = ktime_add(now, exp); + } + + alarm_start(&timr->it.alarm.alarmtimer, exp); return 0; } @@ -730,6 +743,9 @@ static int alarm_timer_nsleep(const clockid_t which_clock, int flags, if (!alarmtimer_get_rtcdev()) return -ENOTSUPP; + if (flags & ~TIMER_ABSTIME) + return -EINVAL; + if (!capable(CAP_WAKE_ALARM)) return -EPERM; From 91947d8cc553b3147140334a295218499b77ea92 Mon Sep 17 00:00:00 2001 From: Aaron Plattner Date: Tue, 8 Jul 2014 00:21:38 -0700 Subject: [PATCH 104/268] ALSA: hda - Add new GPU codec ID 0x10de0070 to snd-hda Vendor ID 0x10de0070 is used by a yet-to-be-named GPU chip. Signed-off-by: Aaron Plattner Signed-off-by: Takashi Iwai --- sound/pci/hda/patch_hdmi.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/sound/pci/hda/patch_hdmi.c b/sound/pci/hda/patch_hdmi.c index 4fe876b65fdaa..ba4ca52072ff7 100644 --- a/sound/pci/hda/patch_hdmi.c +++ b/sound/pci/hda/patch_hdmi.c @@ -3337,6 +3337,7 @@ static const struct hda_codec_preset snd_hda_preset_hdmi[] = { { .id = 0x10de0051, .name = "GPU 51 HDMI/DP", .patch = patch_nvhdmi }, { .id = 0x10de0060, .name = "GPU 60 HDMI/DP", .patch = patch_nvhdmi }, { .id = 0x10de0067, .name = "MCP67 HDMI", .patch = patch_nvhdmi_2ch }, +{ .id = 0x10de0070, .name = "GPU 70 HDMI/DP", .patch = patch_nvhdmi }, { .id = 0x10de0071, .name = "GPU 71 HDMI/DP", .patch = patch_nvhdmi }, { .id = 0x10de8001, .name = "MCP73 HDMI", .patch = patch_nvhdmi_2ch }, { .id = 0x11069f80, .name = "VX900 HDMI/DP", .patch = patch_via_hdmi }, @@ -3394,6 +3395,7 @@ MODULE_ALIAS("snd-hda-codec-id:10de0044"); MODULE_ALIAS("snd-hda-codec-id:10de0051"); MODULE_ALIAS("snd-hda-codec-id:10de0060"); MODULE_ALIAS("snd-hda-codec-id:10de0067"); +MODULE_ALIAS("snd-hda-codec-id:10de0070"); MODULE_ALIAS("snd-hda-codec-id:10de0071"); MODULE_ALIAS("snd-hda-codec-id:10de8001"); MODULE_ALIAS("snd-hda-codec-id:11069f80"); From d45b3279a5a2252cafcd665bbf2db8c9b31ef783 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Tue, 8 Jul 2014 12:25:28 +0200 Subject: [PATCH 105/268] block: don't assume last put of shared tags is for the host There is no inherent reason why the last put of a tag structure must be the one for the Scsi_Host, as device model objects can be held for arbitrary periods. Merge blk_free_tags and __blk_free_tags into a single funtion that just release a references and get rid of the BUG() when the host reference wasn't the last. Signed-off-by: Christoph Hellwig Cc: stable@kernel.org Signed-off-by: Jens Axboe --- block/blk-tag.c | 33 +++++++-------------------------- 1 file changed, 7 insertions(+), 26 deletions(-) diff --git a/block/blk-tag.c b/block/blk-tag.c index 3f33d86722688..a185b86741e5f 100644 --- a/block/blk-tag.c +++ b/block/blk-tag.c @@ -27,18 +27,15 @@ struct request *blk_queue_find_tag(struct request_queue *q, int tag) EXPORT_SYMBOL(blk_queue_find_tag); /** - * __blk_free_tags - release a given set of tag maintenance info + * blk_free_tags - release a given set of tag maintenance info * @bqt: the tag map to free * - * Tries to free the specified @bqt. Returns true if it was - * actually freed and false if there are still references using it + * Drop the reference count on @bqt and frees it when the last reference + * is dropped. */ -static int __blk_free_tags(struct blk_queue_tag *bqt) +void blk_free_tags(struct blk_queue_tag *bqt) { - int retval; - - retval = atomic_dec_and_test(&bqt->refcnt); - if (retval) { + if (atomic_dec_and_test(&bqt->refcnt)) { BUG_ON(find_first_bit(bqt->tag_map, bqt->max_depth) < bqt->max_depth); @@ -50,9 +47,8 @@ static int __blk_free_tags(struct blk_queue_tag *bqt) kfree(bqt); } - - return retval; } +EXPORT_SYMBOL(blk_free_tags); /** * __blk_queue_free_tags - release tag maintenance info @@ -69,27 +65,12 @@ void __blk_queue_free_tags(struct request_queue *q) if (!bqt) return; - __blk_free_tags(bqt); + blk_free_tags(bqt); q->queue_tags = NULL; queue_flag_clear_unlocked(QUEUE_FLAG_QUEUED, q); } -/** - * blk_free_tags - release a given set of tag maintenance info - * @bqt: the tag map to free - * - * For externally managed @bqt frees the map. Callers of this - * function must guarantee to have released all the queues that - * might have been using this tag map. - */ -void blk_free_tags(struct blk_queue_tag *bqt) -{ - if (unlikely(!__blk_free_tags(bqt))) - BUG(); -} -EXPORT_SYMBOL(blk_free_tags); - /** * blk_queue_free_tags - release tag maintenance info * @q: the request queue for the device From 0d461e1b087048b0cc37c9d7b351649578c507b4 Mon Sep 17 00:00:00 2001 From: Gregory CLEMENT Date: Fri, 4 Jul 2014 16:22:16 +0200 Subject: [PATCH 106/268] ARM: mvebu: Fix the operand list in the inline asm of armada_370_xp_pmsu_idle_enter In the inline asm part of the function armada_370_xp_pmsu_idle_enter() the input operand was used. The intent here was to let the compiler choose this register so it could do the optimization it needed. However an input operand is not supposed to be modified by the inline asm code. This can lead to improper generated instructions. In some case generated instruction the compiler made the choice to reuse the same register to store the return value. But in the assembly part this register was modified, so it can lead to return an wrong value. The fix is to use a clobber. Thanks to this the compiler will know that the value of this register will be modified. Signed-off-by: Gregory CLEMENT Reviewed-by: Thomas Petazzoni Link: https://lkml.kernel.org/r/1404483736-16938-1-git-send-email-gregory.clement@free-electrons.com Signed-off-by: Jason Cooper --- arch/arm/mach-mvebu/pmsu.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/arch/arm/mach-mvebu/pmsu.c b/arch/arm/mach-mvebu/pmsu.c index a1d407c0febe9..25aa8237d6684 100644 --- a/arch/arm/mach-mvebu/pmsu.c +++ b/arch/arm/mach-mvebu/pmsu.c @@ -201,12 +201,12 @@ static noinline int do_armada_370_xp_cpu_suspend(unsigned long deepidle) /* Test the CR_C bit and set it if it was cleared */ asm volatile( - "mrc p15, 0, %0, c1, c0, 0 \n\t" - "tst %0, #(1 << 2) \n\t" - "orreq %0, %0, #(1 << 2) \n\t" - "mcreq p15, 0, %0, c1, c0, 0 \n\t" + "mrc p15, 0, r0, c1, c0, 0 \n\t" + "tst r0, #(1 << 2) \n\t" + "orreq r0, r0, #(1 << 2) \n\t" + "mcreq p15, 0, r0, c1, c0, 0 \n\t" "isb " - : : "r" (0)); + : : : "r0"); pr_warn("Failed to suspend the system\n"); From a728b977429383b3fe92b6e3bff9e69365609e0f Mon Sep 17 00:00:00 2001 From: Ezequiel Garcia Date: Tue, 8 Jul 2014 10:37:37 -0300 Subject: [PATCH 107/268] ARM: mvebu: Fix coherency bus notifiers by using separate notifiers Currently, the coherency fabric support registers two bus notifiers; one for platform, one for pci bus types, with the same notifier block. However, this is illegal and can cause serious issues: the notifier block is also a link in the notifier list and cannot be inserted twice. This commit fixes this by using different notifier blocks (with the same notifier callback) to set the platform and pci bus types notifiers. Fixes: b0063aad5dd8 ("ARM: mvebu: use hardware I/O coherency also for PCI devices") Reported-by: Paolo Pisati Signed-off-by: Ezequiel Garcia Link: https://lkml.kernel.org/r/1404826657-6977-1-git-send-email-ezequiel.garcia@free-electrons.com Signed-off-by: Jason Cooper --- arch/arm/mach-mvebu/coherency.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/arch/arm/mach-mvebu/coherency.c b/arch/arm/mach-mvebu/coherency.c index 477202fd39cc0..2bdc3233abe2b 100644 --- a/arch/arm/mach-mvebu/coherency.c +++ b/arch/arm/mach-mvebu/coherency.c @@ -292,6 +292,10 @@ static struct notifier_block mvebu_hwcc_nb = { .notifier_call = mvebu_hwcc_notifier, }; +static struct notifier_block mvebu_hwcc_pci_nb = { + .notifier_call = mvebu_hwcc_notifier, +}; + static void __init armada_370_coherency_init(struct device_node *np) { struct resource res; @@ -427,7 +431,7 @@ static int __init coherency_pci_init(void) { if (coherency_available()) bus_register_notifier(&pci_bus_type, - &mvebu_hwcc_nb); + &mvebu_hwcc_pci_nb); return 0; } From f50b407653f64e76d1c9abda61d0d85cde3ca9ca Mon Sep 17 00:00:00 2001 From: David Vrabel Date: Wed, 2 Jul 2014 16:09:14 +0100 Subject: [PATCH 108/268] xen-netfront: don't nest queue locks in xennet_connect() The nesting of the per-queue rx_lock and tx_lock in xennet_connect() is confusing to both humans and lockdep. The locking is safe because this is the only place where the locks are nested in this way but lockdep still warns. Instead of adding the missing lockdep annotations, refactor the locking to avoid the confusing nesting. This is still safe, because the xenbus connection state changes are all serialized by the xenwatch thread. Signed-off-by: David Vrabel Reported-by: Sander Eikelenboom Signed-off-by: David S. Miller --- drivers/net/xen-netfront.c | 16 ++++++++++++---- 1 file changed, 12 insertions(+), 4 deletions(-) diff --git a/drivers/net/xen-netfront.c b/drivers/net/xen-netfront.c index 2ccb4a02368b9..6a37d62de40b6 100644 --- a/drivers/net/xen-netfront.c +++ b/drivers/net/xen-netfront.c @@ -2046,13 +2046,15 @@ static int xennet_connect(struct net_device *dev) /* By now, the queue structures have been set up */ for (j = 0; j < num_queues; ++j) { queue = &np->queues[j]; - spin_lock_bh(&queue->rx_lock); - spin_lock_irq(&queue->tx_lock); /* Step 1: Discard all pending TX packet fragments. */ + spin_lock_irq(&queue->tx_lock); xennet_release_tx_bufs(queue); + spin_unlock_irq(&queue->tx_lock); /* Step 2: Rebuild the RX buffer freelist and the RX ring itself. */ + spin_lock_bh(&queue->rx_lock); + for (requeue_idx = 0, i = 0; i < NET_RX_RING_SIZE; i++) { skb_frag_t *frag; const struct page *page; @@ -2076,6 +2078,8 @@ static int xennet_connect(struct net_device *dev) } queue->rx.req_prod_pvt = requeue_idx; + + spin_unlock_bh(&queue->rx_lock); } /* @@ -2087,13 +2091,17 @@ static int xennet_connect(struct net_device *dev) netif_carrier_on(np->netdev); for (j = 0; j < num_queues; ++j) { queue = &np->queues[j]; + notify_remote_via_irq(queue->tx_irq); if (queue->tx_irq != queue->rx_irq) notify_remote_via_irq(queue->rx_irq); - xennet_tx_buf_gc(queue); - xennet_alloc_rx_buffers(queue); + spin_lock_irq(&queue->tx_lock); + xennet_tx_buf_gc(queue); spin_unlock_irq(&queue->tx_lock); + + spin_lock_bh(&queue->rx_lock); + xennet_alloc_rx_buffers(queue); spin_unlock_bh(&queue->rx_lock); } From f9feb1e6a25f9e197f9e6e6cb04bf04d2cccff93 Mon Sep 17 00:00:00 2001 From: David Vrabel Date: Wed, 2 Jul 2014 16:09:15 +0100 Subject: [PATCH 109/268] xen-netfront: call netif_carrier_off() only once when disconnecting In xennet_disconnect_backend(), netif_carrier_off() was called once per queue when it needs to only be called once. The queue locking around the netif_carrier_off() call looked very odd. I think they were supposed to synchronize any NAPI instances with the expectation that no further NAPI instances would be scheduled because of the carrier being off (see the check in xennet_rx_interrupt()). But I can't easily tell if this works correctly. Instead, add a napi_synchronize() call after disabling the interrupts. This is obviously correct as with no Rx interrupts, no further NAPI instances will be scheduled. Signed-off-by: David Vrabel Signed-off-by: David S. Miller --- drivers/net/xen-netfront.c | 11 ++++------- 1 file changed, 4 insertions(+), 7 deletions(-) diff --git a/drivers/net/xen-netfront.c b/drivers/net/xen-netfront.c index 6a37d62de40b6..055222bae6e44 100644 --- a/drivers/net/xen-netfront.c +++ b/drivers/net/xen-netfront.c @@ -1439,16 +1439,11 @@ static void xennet_disconnect_backend(struct netfront_info *info) unsigned int i = 0; unsigned int num_queues = info->netdev->real_num_tx_queues; + netif_carrier_off(info->netdev); + for (i = 0; i < num_queues; ++i) { struct netfront_queue *queue = &info->queues[i]; - /* Stop old i/f to prevent errors whilst we rebuild the state. */ - spin_lock_bh(&queue->rx_lock); - spin_lock_irq(&queue->tx_lock); - netif_carrier_off(queue->info->netdev); - spin_unlock_irq(&queue->tx_lock); - spin_unlock_bh(&queue->rx_lock); - if (queue->tx_irq && (queue->tx_irq == queue->rx_irq)) unbind_from_irqhandler(queue->tx_irq, queue); if (queue->tx_irq && (queue->tx_irq != queue->rx_irq)) { @@ -1458,6 +1453,8 @@ static void xennet_disconnect_backend(struct netfront_info *info) queue->tx_evtchn = queue->rx_evtchn = 0; queue->tx_irq = queue->rx_irq = 0; + napi_synchronize(&queue->napi); + /* End access and free the pages */ xennet_end_access(queue->tx_ring_ref, queue->tx.sring); xennet_end_access(queue->rx_ring_ref, queue->rx.sring); From 74adf83f5d7720925499b4938f930591f947b660 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Wed, 18 Jun 2014 11:07:03 +0200 Subject: [PATCH 110/268] nfs: only show Posix ACLs in listxattr if actually present The big ACL switched nfs to use generic_listxattr, which calls all existing ->list handlers. Add a custom .listxattr implementation that only lists the ACLs if they actually are present on the given inode. Signed-off-by: Christoph Hellwig Reported-by: Philippe Troin Tested-by: Philippe Troin Fixes: 013cdf1088d7 (nfs: use generic posix ACL infrastructure ...) Cc: stable@vger.kernel.org # 3.14+ Signed-off-by: Trond Myklebust --- fs/nfs/nfs3acl.c | 43 +++++++++++++++++++++++++++++++++++++++++++ fs/nfs/nfs3proc.c | 4 ++-- 2 files changed, 45 insertions(+), 2 deletions(-) diff --git a/fs/nfs/nfs3acl.c b/fs/nfs/nfs3acl.c index 871d6eda8dba1..8f854dde4150e 100644 --- a/fs/nfs/nfs3acl.c +++ b/fs/nfs/nfs3acl.c @@ -247,3 +247,46 @@ const struct xattr_handler *nfs3_xattr_handlers[] = { &posix_acl_default_xattr_handler, NULL, }; + +static int +nfs3_list_one_acl(struct inode *inode, int type, const char *name, void *data, + size_t size, ssize_t *result) +{ + struct posix_acl *acl; + char *p = data + *result; + + acl = get_acl(inode, type); + if (!acl) + return 0; + + posix_acl_release(acl); + + *result += strlen(name); + *result += 1; + if (!size) + return 0; + if (*result > size) + return -ERANGE; + + strcpy(p, name); + return 0; +} + +ssize_t +nfs3_listxattr(struct dentry *dentry, char *data, size_t size) +{ + struct inode *inode = dentry->d_inode; + ssize_t result = 0; + int error; + + error = nfs3_list_one_acl(inode, ACL_TYPE_ACCESS, + POSIX_ACL_XATTR_ACCESS, data, size, &result); + if (error) + return error; + + error = nfs3_list_one_acl(inode, ACL_TYPE_DEFAULT, + POSIX_ACL_XATTR_DEFAULT, data, size, &result); + if (error) + return error; + return result; +} diff --git a/fs/nfs/nfs3proc.c b/fs/nfs/nfs3proc.c index e7daa42bbc86e..f0afa291fd588 100644 --- a/fs/nfs/nfs3proc.c +++ b/fs/nfs/nfs3proc.c @@ -885,7 +885,7 @@ static const struct inode_operations nfs3_dir_inode_operations = { .getattr = nfs_getattr, .setattr = nfs_setattr, #ifdef CONFIG_NFS_V3_ACL - .listxattr = generic_listxattr, + .listxattr = nfs3_listxattr, .getxattr = generic_getxattr, .setxattr = generic_setxattr, .removexattr = generic_removexattr, @@ -899,7 +899,7 @@ static const struct inode_operations nfs3_file_inode_operations = { .getattr = nfs_getattr, .setattr = nfs_setattr, #ifdef CONFIG_NFS_V3_ACL - .listxattr = generic_listxattr, + .listxattr = nfs3_listxattr, .getxattr = generic_getxattr, .setxattr = generic_setxattr, .removexattr = generic_removexattr, From b6e195fd4f663a7a97183feddb4ec27f0a5a00ec Mon Sep 17 00:00:00 2001 From: Alexander Aring Date: Thu, 3 Jul 2014 23:13:19 +0200 Subject: [PATCH 111/268] MAINTAINERS: change IEEE 802.15.4 maintainer This patch changes the IEEE 802.15.4 subsystem maintainer to Alexander Aring. We discussed this change before via e-mail and I collected the acks from the current maintainers. Signed-off-by: Alexander Aring Acked-by: Dmitry Eremin-Solenikov Acked-by: Alexander Smirnov Signed-off-by: David S. Miller --- MAINTAINERS | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/MAINTAINERS b/MAINTAINERS index ebc812827c63a..23863ce069966 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -4476,8 +4476,7 @@ S: Supported F: drivers/idle/i7300_idle.c IEEE 802.15.4 SUBSYSTEM -M: Alexander Smirnov -M: Dmitry Eremin-Solenikov +M: Alexander Aring L: linux-zigbee-devel@lists.sourceforge.net (moderated for non-subscribers) W: http://apps.sourceforge.net/trac/linux-zigbee T: git git://git.kernel.org/pub/scm/linux/kernel/git/lowpan/lowpan.git From a97e8027b1d28eafe6bafe062556c1ec926a49c6 Mon Sep 17 00:00:00 2001 From: Matthias Brugger Date: Thu, 3 Jul 2014 13:58:52 +0200 Subject: [PATCH 112/268] irqchip: gic: Add support for cortex a7 compatible string Patch 0a68214b "ARM: DT: Add binding for GIC virtualization extentions (VGIC)" added the "arm,cortex-a7-gic" compatible string, but the corresponding IRQCHIP_DECLARE was never added to the gic driver. To let real Cortex-A7 SoCs use it, add the necessary declaration to the device driver. Signed-off-by: Matthias Brugger Link: https://lkml.kernel.org/r/1404388732-28890-1-git-send-email-matthias.bgg@gmail.com Fixes: 0a68214b76ca ("ARM: DT: Add binding for GIC virtualization extentions (VGIC)") Cc: # v3.5+ Signed-off-by: Jason Cooper --- drivers/irqchip/irq-gic.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/irqchip/irq-gic.c b/drivers/irqchip/irq-gic.c index 7e11c9d6ae8c8..aadd6f5723cb6 100644 --- a/drivers/irqchip/irq-gic.c +++ b/drivers/irqchip/irq-gic.c @@ -1073,6 +1073,7 @@ gic_of_init(struct device_node *node, struct device_node *parent) } IRQCHIP_DECLARE(cortex_a15_gic, "arm,cortex-a15-gic", gic_of_init); IRQCHIP_DECLARE(cortex_a9_gic, "arm,cortex-a9-gic", gic_of_init); +IRQCHIP_DECLARE(cortex_a7_gic, "arm,cortex-a7-gic", gic_of_init); IRQCHIP_DECLARE(msm_8660_qgic, "qcom,msm-8660-qgic", gic_of_init); IRQCHIP_DECLARE(msm_qgic2, "qcom,msm-qgic2", gic_of_init); From 29322d0db98e5a84f5cc6a55655bee3dc4ffb5ab Mon Sep 17 00:00:00 2001 From: Jon Paul Maloy Date: Sat, 5 Jul 2014 13:44:13 -0400 Subject: [PATCH 113/268] tipc: fix bug in multicast/broadcast message reassembly Since commit 37e22164a8a3c39bdad45aa463b1e69a1fdf4110 ("tipc: rename and move message reassembly function") reassembly of long broadcast messages has been broken. This is because we test for a non-NULL return value of the *buf parameter as criteria for succesful reassembly. However, this parameter is left defined even after reception of the first fragment, when reassebly is still incomplete. This leads to a kernel crash as soon as a the first fragment of a long broadcast message is received. We fix this with this commit, by implementing a stricter behavior of the function and its return values. This commit should be applied to both net and net-next. Signed-off-by: Jon Maloy Acked-by: Ying Xue Signed-off-by: David S. Miller --- net/tipc/msg.c | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/net/tipc/msg.c b/net/tipc/msg.c index 8be6e94a1ca97..0a37a472c29f9 100644 --- a/net/tipc/msg.c +++ b/net/tipc/msg.c @@ -101,9 +101,11 @@ int tipc_msg_build(struct tipc_msg *hdr, struct iovec const *msg_sect, } /* tipc_buf_append(): Append a buffer to the fragment list of another buffer - * Let first buffer become head buffer - * Returns 1 and sets *buf to headbuf if chain is complete, otherwise 0 - * Leaves headbuf pointer at NULL if failure + * @*headbuf: in: NULL for first frag, otherwise value returned from prev call + * out: set when successful non-complete reassembly, otherwise NULL + * @*buf: in: the buffer to append. Always defined + * out: head buf after sucessful complete reassembly, otherwise NULL + * Returns 1 when reassembly complete, otherwise 0 */ int tipc_buf_append(struct sk_buff **headbuf, struct sk_buff **buf) { @@ -122,6 +124,7 @@ int tipc_buf_append(struct sk_buff **headbuf, struct sk_buff **buf) goto out_free; head = *headbuf = frag; skb_frag_list_init(head); + *buf = NULL; return 0; } if (!head) @@ -150,5 +153,7 @@ int tipc_buf_append(struct sk_buff **headbuf, struct sk_buff **buf) out_free: pr_warn_ratelimited("Unable to build fragment list\n"); kfree_skb(*buf); + kfree_skb(*headbuf); + *buf = *headbuf = NULL; return 0; } From 85b722d760f0de77c4bb371b77202784671f5a54 Mon Sep 17 00:00:00 2001 From: Rickard Strandqvist Date: Sun, 6 Jul 2014 14:04:37 +0200 Subject: [PATCH 114/268] isdn: hisax: l3ni1.c: Fix for possible null pointer dereference There is otherwise a risk of a possible null pointer dereference. Was largely found by using a static code analysis program called cppcheck. Signed-off-by: Rickard Strandqvist --- drivers/isdn/hisax/l3ni1.c | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) diff --git a/drivers/isdn/hisax/l3ni1.c b/drivers/isdn/hisax/l3ni1.c index 0df6691d045c2..8dc791bfaa6fd 100644 --- a/drivers/isdn/hisax/l3ni1.c +++ b/drivers/isdn/hisax/l3ni1.c @@ -2059,13 +2059,17 @@ static int l3ni1_cmd_global(struct PStack *st, isdn_ctrl *ic) memcpy(p, ic->parm.ni1_io.data, ic->parm.ni1_io.datalen); /* copy data */ l = (p - temp) + ic->parm.ni1_io.datalen; /* total length */ - if (ic->parm.ni1_io.timeout > 0) - if (!(pc = ni1_new_l3_process(st, -1))) - { free_invoke_id(st, id); + if (ic->parm.ni1_io.timeout > 0) { + pc = ni1_new_l3_process(st, -1); + if (!pc) { + free_invoke_id(st, id); return (-2); } - pc->prot.ni1.ll_id = ic->parm.ni1_io.ll_id; /* remember id */ - pc->prot.ni1.proc = ic->parm.ni1_io.proc; /* and procedure */ + /* remember id */ + pc->prot.ni1.ll_id = ic->parm.ni1_io.ll_id; + /* and procedure */ + pc->prot.ni1.proc = ic->parm.ni1_io.proc; + } if (!(skb = l3_alloc_skb(l))) { free_invoke_id(st, id); From 233b43010330ed8cf39cf636880017df3e33f102 Mon Sep 17 00:00:00 2001 From: Hariprasad S Date: Mon, 23 Jun 2014 19:12:35 +0530 Subject: [PATCH 115/268] RDMA/cxgb4: Fix skb_leak in reject_cr() Based on origninal work by Steve Wise Signed-off-by: Steve Wise Signed-off-by: Hariprasad Shenai Signed-off-by: Roland Dreier --- drivers/infiniband/hw/cxgb4/cm.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/infiniband/hw/cxgb4/cm.c b/drivers/infiniband/hw/cxgb4/cm.c index 5e153f6d4b48f..cc36e9bf9dea3 100644 --- a/drivers/infiniband/hw/cxgb4/cm.c +++ b/drivers/infiniband/hw/cxgb4/cm.c @@ -2180,7 +2180,6 @@ static void reject_cr(struct c4iw_dev *dev, u32 hwtid, struct sk_buff *skb) PDBG("%s c4iw_dev %p tid %u\n", __func__, dev, hwtid); BUG_ON(skb_cloned(skb)); skb_trim(skb, sizeof(struct cpl_tid_release)); - skb_get(skb); release_tid(&dev->rdev, hwtid, skb); return; } From 5dab6d3ab1abed99be6166b844af58237d52a135 Mon Sep 17 00:00:00 2001 From: Hariprasad S Date: Mon, 23 Jun 2014 19:12:36 +0530 Subject: [PATCH 116/268] RDMA/cxgb4: Clean up connection on ARP error Based on origninal work by Steve Wise Signed-off-by: Steve Wise Signed-off-by: Hariprasad Shenai Signed-off-by: Roland Dreier --- drivers/infiniband/hw/cxgb4/cm.c | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/drivers/infiniband/hw/cxgb4/cm.c b/drivers/infiniband/hw/cxgb4/cm.c index cc36e9bf9dea3..6a93280250d16 100644 --- a/drivers/infiniband/hw/cxgb4/cm.c +++ b/drivers/infiniband/hw/cxgb4/cm.c @@ -432,8 +432,17 @@ static void arp_failure_discard(void *handle, struct sk_buff *skb) */ static void act_open_req_arp_failure(void *handle, struct sk_buff *skb) { + struct c4iw_ep *ep = handle; + printk(KERN_ERR MOD "ARP failure duing connect\n"); kfree_skb(skb); + connect_reply_upcall(ep, -EHOSTUNREACH); + state_set(&ep->com, DEAD); + remove_handle(ep->com.dev, &ep->com.dev->atid_idr, ep->atid); + cxgb4_free_atid(ep->com.dev->rdev.lldi.tids, ep->atid); + dst_release(ep->dst); + cxgb4_l2t_release(ep->l2t); + c4iw_put_ep(&ep->com); } /* @@ -658,7 +667,7 @@ static int send_connect(struct c4iw_ep *ep) opt2 |= T5_OPT_2_VALID; opt2 |= V_CONG_CNTRL(CONG_ALG_TAHOE); } - t4_set_arp_err_handler(skb, NULL, act_open_req_arp_failure); + t4_set_arp_err_handler(skb, ep, act_open_req_arp_failure); if (is_t4(ep->com.dev->rdev.lldi.adapter_type)) { if (ep->com.remote_addr.ss_family == AF_INET) { From 6b54d54dea82ae214e4a45a503c4ef755a8ecee8 Mon Sep 17 00:00:00 2001 From: Steve Wise Date: Tue, 8 Jul 2014 10:20:35 -0500 Subject: [PATCH 117/268] RDMA/cxgb4: Initialize the device status page The status page is mapped to user processes and allows sharing the device state between the kernel and user processes. This state isn't getting initialized and thus intermittently causes problems. Namely, the user process can mistakenly think the user doorbell writes are disabled which causes SQ work requests to never get fetched by HW. Fixes: 05eb23893c2c ("cxgb4/iw_cxgb4: Doorbell Drop Avoidance Bug Fixes"). Signed-off-by: Steve Wise Cc: # v3.15 Signed-off-by: Roland Dreier --- drivers/infiniband/hw/cxgb4/device.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/infiniband/hw/cxgb4/device.c b/drivers/infiniband/hw/cxgb4/device.c index dd93aadc996e1..16b75de4b2def 100644 --- a/drivers/infiniband/hw/cxgb4/device.c +++ b/drivers/infiniband/hw/cxgb4/device.c @@ -696,6 +696,7 @@ static int c4iw_rdev_open(struct c4iw_rdev *rdev) pr_err(MOD "error allocating status page\n"); goto err4; } + rdev->status_page->db_off = 0; return 0; err4: c4iw_rqtpool_destroy(rdev); From e0056593b61253f1a8a9941dacda22e73b963cdc Mon Sep 17 00:00:00 2001 From: Dmitry Popov Date: Sat, 5 Jul 2014 02:26:37 +0400 Subject: [PATCH 118/268] ip_tunnel: fix ip_tunnel_lookup This patch fixes 3 similar bugs where incoming packets might be routed into wrong non-wildcard tunnels: 1) Consider the following setup: ip address add 1.1.1.1/24 dev eth0 ip address add 1.1.1.2/24 dev eth0 ip tunnel add ipip1 remote 2.2.2.2 local 1.1.1.1 mode ipip dev eth0 ip link set ipip1 up Incoming ipip packets from 2.2.2.2 were routed into ipip1 even if it has dst = 1.1.1.2. Moreover even if there was wildcard tunnel like ip tunnel add ipip0 remote 2.2.2.2 local any mode ipip dev eth0 but it was created before explicit one (with local 1.1.1.1), incoming ipip packets with src = 2.2.2.2 and dst = 1.1.1.2 were still routed into ipip1. Same issue existed with all tunnels that use ip_tunnel_lookup (gre, vti) 2) ip address add 1.1.1.1/24 dev eth0 ip tunnel add ipip1 remote 2.2.146.85 local 1.1.1.1 mode ipip dev eth0 ip link set ipip1 up Incoming ipip packets with dst = 1.1.1.1 were routed into ipip1, no matter what src address is. Any remote ip address which has ip_tunnel_hash = 0 raised this issue, 2.2.146.85 is just an example, there are more than 4 million of them. And again, wildcard tunnel like ip tunnel add ipip0 remote any local 1.1.1.1 mode ipip dev eth0 wouldn't be ever matched if it was created before explicit tunnel like above. Gre & vti tunnels had the same issue. 3) ip address add 1.1.1.1/24 dev eth0 ip tunnel add gre1 remote 2.2.146.84 local 1.1.1.1 key 1 mode gre dev eth0 ip link set gre1 up Any incoming gre packet with key = 1 were routed into gre1, no matter what src/dst addresses are. Any remote ip address which has ip_tunnel_hash = 0 raised the issue, 2.2.146.84 is just an example, there are more than 4 million of them. Wildcard tunnel like ip tunnel add gre2 remote any local any key 1 mode gre dev eth0 wouldn't be ever matched if it was created before explicit tunnel like above. All this stuff happened because while looking for a wildcard tunnel we didn't check that matched tunnel is a wildcard one. Fixed. Signed-off-by: Dmitry Popov Signed-off-by: David S. Miller --- net/ipv4/ip_tunnel.c | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/net/ipv4/ip_tunnel.c b/net/ipv4/ip_tunnel.c index 54b6731dab559..6f9de61dce5f9 100644 --- a/net/ipv4/ip_tunnel.c +++ b/net/ipv4/ip_tunnel.c @@ -169,6 +169,7 @@ struct ip_tunnel *ip_tunnel_lookup(struct ip_tunnel_net *itn, hlist_for_each_entry_rcu(t, head, hash_node) { if (remote != t->parms.iph.daddr || + t->parms.iph.saddr != 0 || !(t->dev->flags & IFF_UP)) continue; @@ -185,10 +186,11 @@ struct ip_tunnel *ip_tunnel_lookup(struct ip_tunnel_net *itn, head = &itn->tunnels[hash]; hlist_for_each_entry_rcu(t, head, hash_node) { - if ((local != t->parms.iph.saddr && - (local != t->parms.iph.daddr || - !ipv4_is_multicast(local))) || - !(t->dev->flags & IFF_UP)) + if ((local != t->parms.iph.saddr || t->parms.iph.daddr != 0) && + (local != t->parms.iph.daddr || !ipv4_is_multicast(local))) + continue; + + if (!(t->dev->flags & IFF_UP)) continue; if (!ip_tunnel_key_match(&t->parms, flags, key)) @@ -205,6 +207,8 @@ struct ip_tunnel *ip_tunnel_lookup(struct ip_tunnel_net *itn, hlist_for_each_entry_rcu(t, head, hash_node) { if (t->parms.i_key != key || + t->parms.iph.saddr != 0 || + t->parms.iph.daddr != 0 || !(t->dev->flags & IFF_UP)) continue; From 36beddc272c111689f3042bf3d10a64d8a805f93 Mon Sep 17 00:00:00 2001 From: Andrey Utkin Date: Mon, 7 Jul 2014 23:22:50 +0300 Subject: [PATCH 119/268] appletalk: Fix socket referencing in skb Setting just skb->sk without taking its reference and setting a destructor is invalid. However, in the places where this was done, skb is used in a way not requiring skb->sk setting. So dropping the setting of skb->sk. Thanks to Eric Dumazet for correct solution. Bugzilla: https://bugzilla.kernel.org/show_bug.cgi?id=79441 Reported-by: Ed Martin Signed-off-by: Andrey Utkin Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- net/appletalk/ddp.c | 3 --- 1 file changed, 3 deletions(-) diff --git a/net/appletalk/ddp.c b/net/appletalk/ddp.c index 01a1082e02b31..bfcf6be1d665c 100644 --- a/net/appletalk/ddp.c +++ b/net/appletalk/ddp.c @@ -1489,8 +1489,6 @@ static int atalk_rcv(struct sk_buff *skb, struct net_device *dev, goto drop; /* Queue packet (standard) */ - skb->sk = sock; - if (sock_queue_rcv_skb(sock, skb) < 0) goto drop; @@ -1644,7 +1642,6 @@ static int atalk_sendmsg(struct kiocb *iocb, struct socket *sock, struct msghdr if (!skb) goto out; - skb->sk = sk; skb_reserve(skb, ddp_dl->header_length); skb_reserve(skb, dev->hard_header_len); skb->dev = dev; From 0b0302449110ca5ca4350458ed57b971fcb78ec1 Mon Sep 17 00:00:00 2001 From: hayeswang Date: Tue, 8 Jul 2014 14:49:28 +0800 Subject: [PATCH 120/268] r8152: wake up the device before dumping the hw counter The device should be waked up from runtime suspend before dumping the hw counter. Signed-off-by: Hayes Wang Signed-off-by: David S. Miller --- drivers/net/usb/r8152.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/drivers/net/usb/r8152.c b/drivers/net/usb/r8152.c index 25431965a625a..a795ecf8d5b63 100644 --- a/drivers/net/usb/r8152.c +++ b/drivers/net/usb/r8152.c @@ -3204,8 +3204,13 @@ static void rtl8152_get_ethtool_stats(struct net_device *dev, struct r8152 *tp = netdev_priv(dev); struct tally_counter tally; + if (usb_autopm_get_interface(tp->intf) < 0) + return; + generic_ocp_read(tp, PLA_TALLYCNT, sizeof(tally), &tally, MCU_TYPE_PLA); + usb_autopm_put_interface(tp->intf); + data[0] = le64_to_cpu(tally.tx_packets); data[1] = le64_to_cpu(tally.rx_packets); data[2] = le64_to_cpu(tally.tx_errors); From fbc6daf19745b372c0d909e5d74ab02e42b70e51 Mon Sep 17 00:00:00 2001 From: Amir Vadai Date: Tue, 8 Jul 2014 11:28:12 +0300 Subject: [PATCH 121/268] net/mlx4_en: Ignore budget on TX napi polling It is recommended that TX work not count against the quota. The cost of TX packet liberation is a minute percentage of what it costs to process an RX frame. Furthermore, that SKB freeing makes memory available for other paths in the stack. Give the TX a larger budget and be more aggressive about cleaning up the Tx descriptors this budget could be changed using ethtool: $ ethtool -C eth1 tx-frames-irq Signed-off-by: Amir Vadai Signed-off-by: David S. Miller --- .../net/ethernet/mellanox/mlx4/en_ethtool.c | 7 +++++ .../net/ethernet/mellanox/mlx4/en_netdev.c | 1 + drivers/net/ethernet/mellanox/mlx4/en_tx.c | 28 +++++++++---------- drivers/net/ethernet/mellanox/mlx4/mlx4_en.h | 3 ++ 4 files changed, 24 insertions(+), 15 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx4/en_ethtool.c b/drivers/net/ethernet/mellanox/mlx4/en_ethtool.c index fa1a069e14e6f..68d763d2d030d 100644 --- a/drivers/net/ethernet/mellanox/mlx4/en_ethtool.c +++ b/drivers/net/ethernet/mellanox/mlx4/en_ethtool.c @@ -417,6 +417,8 @@ static int mlx4_en_get_coalesce(struct net_device *dev, coal->tx_coalesce_usecs = priv->tx_usecs; coal->tx_max_coalesced_frames = priv->tx_frames; + coal->tx_max_coalesced_frames_irq = priv->tx_work_limit; + coal->rx_coalesce_usecs = priv->rx_usecs; coal->rx_max_coalesced_frames = priv->rx_frames; @@ -426,6 +428,7 @@ static int mlx4_en_get_coalesce(struct net_device *dev, coal->rx_coalesce_usecs_high = priv->rx_usecs_high; coal->rate_sample_interval = priv->sample_interval; coal->use_adaptive_rx_coalesce = priv->adaptive_rx_coal; + return 0; } @@ -434,6 +437,9 @@ static int mlx4_en_set_coalesce(struct net_device *dev, { struct mlx4_en_priv *priv = netdev_priv(dev); + if (!coal->tx_max_coalesced_frames_irq) + return -EINVAL; + priv->rx_frames = (coal->rx_max_coalesced_frames == MLX4_EN_AUTO_CONF) ? MLX4_EN_RX_COAL_TARGET : @@ -457,6 +463,7 @@ static int mlx4_en_set_coalesce(struct net_device *dev, priv->rx_usecs_high = coal->rx_coalesce_usecs_high; priv->sample_interval = coal->rate_sample_interval; priv->adaptive_rx_coal = coal->use_adaptive_rx_coalesce; + priv->tx_work_limit = coal->tx_max_coalesced_frames_irq; return mlx4_en_moderation_update(priv); } diff --git a/drivers/net/ethernet/mellanox/mlx4/en_netdev.c b/drivers/net/ethernet/mellanox/mlx4/en_netdev.c index e94d96f0ef55f..7345c43b019e5 100644 --- a/drivers/net/ethernet/mellanox/mlx4/en_netdev.c +++ b/drivers/net/ethernet/mellanox/mlx4/en_netdev.c @@ -2473,6 +2473,7 @@ int mlx4_en_init_netdev(struct mlx4_en_dev *mdev, int port, MLX4_WQE_CTRL_SOLICITED); priv->num_tx_rings_p_up = mdev->profile.num_tx_rings_p_up; priv->tx_ring_num = prof->tx_ring_num; + priv->tx_work_limit = MLX4_EN_DEFAULT_TX_WORK; priv->tx_ring = kzalloc(sizeof(struct mlx4_en_tx_ring *) * MAX_TX_RINGS, GFP_KERNEL); diff --git a/drivers/net/ethernet/mellanox/mlx4/en_tx.c b/drivers/net/ethernet/mellanox/mlx4/en_tx.c index ac3dead3792cf..5045bab596338 100644 --- a/drivers/net/ethernet/mellanox/mlx4/en_tx.c +++ b/drivers/net/ethernet/mellanox/mlx4/en_tx.c @@ -351,9 +351,8 @@ int mlx4_en_free_tx_buf(struct net_device *dev, struct mlx4_en_tx_ring *ring) return cnt; } -static int mlx4_en_process_tx_cq(struct net_device *dev, - struct mlx4_en_cq *cq, - int budget) +static bool mlx4_en_process_tx_cq(struct net_device *dev, + struct mlx4_en_cq *cq) { struct mlx4_en_priv *priv = netdev_priv(dev); struct mlx4_cq *mcq = &cq->mcq; @@ -372,9 +371,10 @@ static int mlx4_en_process_tx_cq(struct net_device *dev, int factor = priv->cqe_factor; u64 timestamp = 0; int done = 0; + int budget = priv->tx_work_limit; if (!priv->port_up) - return 0; + return true; index = cons_index & size_mask; cqe = &buf[(index << factor) + factor]; @@ -447,7 +447,7 @@ static int mlx4_en_process_tx_cq(struct net_device *dev, netif_tx_wake_queue(ring->tx_queue); ring->wake_queue++; } - return done; + return done < budget; } void mlx4_en_tx_irq(struct mlx4_cq *mcq) @@ -467,18 +467,16 @@ int mlx4_en_poll_tx_cq(struct napi_struct *napi, int budget) struct mlx4_en_cq *cq = container_of(napi, struct mlx4_en_cq, napi); struct net_device *dev = cq->dev; struct mlx4_en_priv *priv = netdev_priv(dev); - int done; + int clean_complete; - done = mlx4_en_process_tx_cq(dev, cq, budget); + clean_complete = mlx4_en_process_tx_cq(dev, cq); + if (!clean_complete) + return budget; - /* If we used up all the quota - we're probably not done yet... */ - if (done < budget) { - /* Done for now */ - napi_complete(napi); - mlx4_en_arm_cq(priv, cq); - return done; - } - return budget; + napi_complete(napi); + mlx4_en_arm_cq(priv, cq); + + return 0; } static struct mlx4_en_tx_desc *mlx4_en_bounce_to_desc(struct mlx4_en_priv *priv, diff --git a/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h b/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h index 624e1939e9ee3..d72a5a894fc6a 100644 --- a/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h +++ b/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h @@ -126,6 +126,8 @@ enum { #define MAX_TX_RINGS (MLX4_EN_MAX_TX_RING_P_UP * \ MLX4_EN_NUM_UP) +#define MLX4_EN_DEFAULT_TX_WORK 256 + /* Target number of packets to coalesce with interrupt moderation */ #define MLX4_EN_RX_COAL_TARGET 44 #define MLX4_EN_RX_COAL_TIME 0x10 @@ -543,6 +545,7 @@ struct mlx4_en_priv { __be32 ctrl_flags; u32 flags; u8 num_tx_rings_p_up; + u32 tx_work_limit; u32 tx_ring_num; u32 rx_ring_num; u32 rx_skb_size; From 4d12bc63ab5e48c1d78fa13883cf6fefcea3afb1 Mon Sep 17 00:00:00 2001 From: Thomas Petazzoni Date: Tue, 8 Jul 2014 10:49:43 +0200 Subject: [PATCH 122/268] net: mvneta: fix operation in 10 Mbit/s mode As reported by Maggie Mae Roxas, the mvneta driver doesn't behave properly in 10 Mbit/s mode. This is due to a misconfiguration of the MVNETA_GMAC_AUTONEG_CONFIG register: bit MVNETA_GMAC_CONFIG_MII_SPEED must be set for a 100 Mbit/s speed, but cleared for a 10 Mbit/s speed, which the driver was not properly doing. This commit adjusts that by setting the MVNETA_GMAC_CONFIG_MII_SPEED bit only in 100 Mbit/s mode, and relying on the fact that all the speed related bits of this register are cleared at the beginning of the mvneta_adjust_link() function. This problem exists since c5aff18204da0 ("net: mvneta: driver for Marvell Armada 370/XP network unit") which is the commit that introduced the mvneta driver in the kernel. Cc: # v3.8+ Fixes: c5aff18204da0 ("net: mvneta: driver for Marvell Armada 370/XP network unit") Reported-by: Maggie Mae Roxas Cc: Maggie Mae Roxas Signed-off-by: Thomas Petazzoni Signed-off-by: David S. Miller --- drivers/net/ethernet/marvell/mvneta.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/marvell/mvneta.c b/drivers/net/ethernet/marvell/mvneta.c index 45beca17fa50a..d49f08d220713 100644 --- a/drivers/net/ethernet/marvell/mvneta.c +++ b/drivers/net/ethernet/marvell/mvneta.c @@ -2529,7 +2529,7 @@ static void mvneta_adjust_link(struct net_device *ndev) if (phydev->speed == SPEED_1000) val |= MVNETA_GMAC_CONFIG_GMII_SPEED; - else + else if (phydev->speed == SPEED_100) val |= MVNETA_GMAC_CONFIG_MII_SPEED; mvreg_write(pp, MVNETA_GMAC_AUTONEG_CONFIG, val); From 0a1985879437d14bda8c90d0dae3455c467d7642 Mon Sep 17 00:00:00 2001 From: Thomas Fitzsimmons Date: Tue, 8 Jul 2014 19:44:07 -0400 Subject: [PATCH 123/268] net: mvneta: Fix big endian issue in mvneta_txq_desc_csum() This commit fixes the command value generated for CSUM calculation when running in big endian mode. The Ethernet protocol ID for IP was being unconditionally byte-swapped in the layer 3 protocol check (with swab16), which caused the mvneta driver to not function correctly in big endian mode. This patch byte-swaps the ID conditionally with htons. Cc: # v3.13+ Signed-off-by: Thomas Fitzsimmons Signed-off-by: David S. Miller --- drivers/net/ethernet/marvell/mvneta.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/marvell/mvneta.c b/drivers/net/ethernet/marvell/mvneta.c index d49f08d220713..dadd9a5f6323c 100644 --- a/drivers/net/ethernet/marvell/mvneta.c +++ b/drivers/net/ethernet/marvell/mvneta.c @@ -1207,7 +1207,7 @@ static u32 mvneta_txq_desc_csum(int l3_offs, int l3_proto, command = l3_offs << MVNETA_TX_L3_OFF_SHIFT; command |= ip_hdr_len << MVNETA_TX_IP_HLEN_SHIFT; - if (l3_proto == swab16(ETH_P_IP)) + if (l3_proto == htons(ETH_P_IP)) command |= MVNETA_TXD_IP_CSUM; else command |= MVNETA_TX_L3_IP6; From ac30ef832e6af0505b6f0251a6659adcfa74975e Mon Sep 17 00:00:00 2001 From: Ben Pfaff Date: Wed, 9 Jul 2014 10:31:22 -0700 Subject: [PATCH 124/268] netlink: Fix handling of error from netlink_dump(). netlink_dump() returns a negative errno value on error. Until now, netlink_recvmsg() directly recorded that negative value in sk->sk_err, but that's wrong since sk_err takes positive errno values. (This manifests as userspace receiving a positive return value from the recv() system call, falsely indicating success.) This bug was introduced in the commit that started checking the netlink_dump() return value, commit b44d211 (netlink: handle errors from netlink_dump()). Multithreaded Netlink dumps are one way to trigger this behavior in practice, as described in the commit message for the userspace workaround posted here: http://openvswitch.org/pipermail/dev/2014-June/042339.html This commit also fixes the same bug in netlink_poll(), introduced in commit cd1df525d (netlink: add flow control for memory mapped I/O). Signed-off-by: Ben Pfaff Signed-off-by: David S. Miller --- net/netlink/af_netlink.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c index 15c731f03fa66..e6fac7e3db52e 100644 --- a/net/netlink/af_netlink.c +++ b/net/netlink/af_netlink.c @@ -636,7 +636,7 @@ static unsigned int netlink_poll(struct file *file, struct socket *sock, while (nlk->cb_running && netlink_dump_space(nlk)) { err = netlink_dump(sk); if (err < 0) { - sk->sk_err = err; + sk->sk_err = -err; sk->sk_error_report(sk); break; } @@ -2483,7 +2483,7 @@ static int netlink_recvmsg(struct kiocb *kiocb, struct socket *sock, atomic_read(&sk->sk_rmem_alloc) <= sk->sk_rcvbuf / 2) { ret = netlink_dump(sk); if (ret) { - sk->sk_err = ret; + sk->sk_err = -ret; sk->sk_error_report(sk); } } From b51ecea852b712618796d9eab8428a7d5f1f106f Mon Sep 17 00:00:00 2001 From: hayeswang Date: Wed, 9 Jul 2014 14:52:51 +0800 Subject: [PATCH 125/268] r8169: disable L23 For RTL8411, RTL8111G, RTL8402, RTL8105, and RTL8106, disable the feature of entering the L2/L3 link state of the PCIe. When the nic starts the process of entering the L2/L3 link state and the PCI reset occurs before the work is finished, the work would be queued and continue after the next the PCI reset occurs. This causes the device stays in L2/L3 link state, and the system couldn't find the device. Signed-off-by: Hayes Wang Acked-by: Francois Romieu Signed-off-by: David S. Miller --- drivers/net/ethernet/realtek/r8169.c | 25 +++++++++++++++++++++++++ 1 file changed, 25 insertions(+) diff --git a/drivers/net/ethernet/realtek/r8169.c b/drivers/net/ethernet/realtek/r8169.c index be425ad5e8248..06bdc31a828dc 100644 --- a/drivers/net/ethernet/realtek/r8169.c +++ b/drivers/net/ethernet/realtek/r8169.c @@ -538,6 +538,7 @@ enum rtl_register_content { MagicPacket = (1 << 5), /* Wake up when receives a Magic Packet */ LinkUp = (1 << 4), /* Wake up when the cable connection is re-established */ Jumbo_En0 = (1 << 2), /* 8168 only. Reserved in the 8168b */ + Rdy_to_L23 = (1 << 1), /* L23 Enable */ Beacon_en = (1 << 0), /* 8168 only. Reserved in the 8168b */ /* Config4 register */ @@ -4897,6 +4898,21 @@ static void rtl_enable_clock_request(struct pci_dev *pdev) PCI_EXP_LNKCTL_CLKREQ_EN); } +static void rtl_pcie_state_l2l3_enable(struct rtl8169_private *tp, bool enable) +{ + void __iomem *ioaddr = tp->mmio_addr; + u8 data; + + data = RTL_R8(Config3); + + if (enable) + data |= Rdy_to_L23; + else + data &= ~Rdy_to_L23; + + RTL_W8(Config3, data); +} + #define R8168_CPCMD_QUIRK_MASK (\ EnableBist | \ Mac_dbgo_oe | \ @@ -5246,6 +5262,7 @@ static void rtl_hw_start_8411(struct rtl8169_private *tp) }; rtl_hw_start_8168f(tp); + rtl_pcie_state_l2l3_enable(tp, false); rtl_ephy_init(tp, e_info_8168f_1, ARRAY_SIZE(e_info_8168f_1)); @@ -5284,6 +5301,8 @@ static void rtl_hw_start_8168g_1(struct rtl8169_private *tp) rtl_w1w0_eri(tp, 0x2fc, ERIAR_MASK_0001, 0x01, 0x06, ERIAR_EXGMAC); rtl_w1w0_eri(tp, 0x1b0, ERIAR_MASK_0011, 0x0000, 0x1000, ERIAR_EXGMAC); + + rtl_pcie_state_l2l3_enable(tp, false); } static void rtl_hw_start_8168g_2(struct rtl8169_private *tp) @@ -5536,6 +5555,8 @@ static void rtl_hw_start_8105e_1(struct rtl8169_private *tp) RTL_W8(DLLPR, RTL_R8(DLLPR) | PFM_EN); rtl_ephy_init(tp, e_info_8105e_1, ARRAY_SIZE(e_info_8105e_1)); + + rtl_pcie_state_l2l3_enable(tp, false); } static void rtl_hw_start_8105e_2(struct rtl8169_private *tp) @@ -5571,6 +5592,8 @@ static void rtl_hw_start_8402(struct rtl8169_private *tp) rtl_eri_write(tp, 0xc0, ERIAR_MASK_0011, 0x0000, ERIAR_EXGMAC); rtl_eri_write(tp, 0xb8, ERIAR_MASK_0011, 0x0000, ERIAR_EXGMAC); rtl_w1w0_eri(tp, 0x0d4, ERIAR_MASK_0011, 0x0e00, 0xff00, ERIAR_EXGMAC); + + rtl_pcie_state_l2l3_enable(tp, false); } static void rtl_hw_start_8106(struct rtl8169_private *tp) @@ -5583,6 +5606,8 @@ static void rtl_hw_start_8106(struct rtl8169_private *tp) RTL_W32(MISC, (RTL_R32(MISC) | DISABLE_LAN_EN) & ~EARLY_TALLY_EN); RTL_W8(MCU, RTL_R8(MCU) | EN_NDP | EN_OOB_RESET); RTL_W8(DLLPR, RTL_R8(DLLPR) & ~PFM_EN); + + rtl_pcie_state_l2l3_enable(tp, false); } static void rtl_hw_start_8101(struct net_device *dev) From 6ef07a9f369742a7b18c77484411cff0bd790291 Mon Sep 17 00:00:00 2001 From: Sagi Grimberg Date: Sun, 8 Jun 2014 10:00:59 +0300 Subject: [PATCH 126/268] mlx5_core: Fix possible race between mr tree insert/delete In mlx5_core_destroy_mkey(), we must first remove the mr from the radix tree and then destroy it. Otherwise we might hit a race if the key was reallocated and we attempted to insert it to the radix tree. Also handle radix tree insert/delete failures. Signed-off-by: Sagi Grimberg Reviewed-by: Eli Cohen Signed-off-by: Roland Dreier --- drivers/net/ethernet/mellanox/mlx5/core/mr.c | 19 +++++++++++++++---- 1 file changed, 15 insertions(+), 4 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/mr.c b/drivers/net/ethernet/mellanox/mlx5/core/mr.c index ba0401d4af502..184c3615f4799 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/mr.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/mr.c @@ -94,6 +94,11 @@ int mlx5_core_create_mkey(struct mlx5_core_dev *dev, struct mlx5_core_mr *mr, write_lock_irq(&table->lock); err = radix_tree_insert(&table->tree, mlx5_base_mkey(mr->key), mr); write_unlock_irq(&table->lock); + if (err) { + mlx5_core_warn(dev, "failed radix tree insert of mr 0x%x, %d\n", + mlx5_base_mkey(mr->key), err); + mlx5_core_destroy_mkey(dev, mr); + } return err; } @@ -104,12 +109,22 @@ int mlx5_core_destroy_mkey(struct mlx5_core_dev *dev, struct mlx5_core_mr *mr) struct mlx5_mr_table *table = &dev->priv.mr_table; struct mlx5_destroy_mkey_mbox_in in; struct mlx5_destroy_mkey_mbox_out out; + struct mlx5_core_mr *deleted_mr; unsigned long flags; int err; memset(&in, 0, sizeof(in)); memset(&out, 0, sizeof(out)); + write_lock_irqsave(&table->lock, flags); + deleted_mr = radix_tree_delete(&table->tree, mlx5_base_mkey(mr->key)); + write_unlock_irqrestore(&table->lock, flags); + if (!deleted_mr) { + mlx5_core_warn(dev, "failed radix tree delete of mr 0x%x\n", + mlx5_base_mkey(mr->key)); + return -ENOENT; + } + in.hdr.opcode = cpu_to_be16(MLX5_CMD_OP_DESTROY_MKEY); in.mkey = cpu_to_be32(mlx5_mkey_to_idx(mr->key)); err = mlx5_cmd_exec(dev, &in, sizeof(in), &out, sizeof(out)); @@ -119,10 +134,6 @@ int mlx5_core_destroy_mkey(struct mlx5_core_dev *dev, struct mlx5_core_mr *mr) if (out.hdr.status) return mlx5_cmd_status_to_err(&out.hdr); - write_lock_irqsave(&table->lock, flags); - radix_tree_delete(&table->tree, mlx5_base_mkey(mr->key)); - write_unlock_irqrestore(&table->lock, flags); - return err; } EXPORT_SYMBOL(mlx5_core_destroy_mkey); From a12f78c582b5a7a549fbee1a58d5778e651764ad Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Stefan=20S=C3=B8rensen?= Date: Wed, 25 Jun 2014 14:40:16 +0200 Subject: [PATCH 127/268] dp83640: Always decode received status frames MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Currently status frames are only handled when packet timestamping is enabled, but status frames are also needed for pin event timestamping. Fix by moving packet timestamping check to after status frame decode. Signed-off-by: Stefan Sørensen Acked-by: Richard Cochran Signed-off-by: David S. Miller --- drivers/net/phy/dp83640.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/net/phy/dp83640.c b/drivers/net/phy/dp83640.c index 6a999e6814a07..9408157a246c8 100644 --- a/drivers/net/phy/dp83640.c +++ b/drivers/net/phy/dp83640.c @@ -1323,15 +1323,15 @@ static bool dp83640_rxtstamp(struct phy_device *phydev, { struct dp83640_private *dp83640 = phydev->priv; - if (!dp83640->hwts_rx_en) - return false; - if (is_status_frame(skb, type)) { decode_status_frame(dp83640, skb); kfree_skb(skb); return true; } + if (!dp83640->hwts_rx_en) + return false; + SKB_PTP_TYPE(skb) = type; skb_queue_tail(&dp83640->rx_queue, skb); schedule_work(&dp83640->ts_work); From b4df480f68ae03b5dd4ab0db56536fbcec741705 Mon Sep 17 00:00:00 2001 From: Joonyoung Shim Date: Thu, 10 Jul 2014 11:49:42 +0900 Subject: [PATCH 128/268] usbnet: smsc95xx: add reset_resume function with reset operation The smsc95xx needs to resume with reset operation. Otherwise it causes system hang by network error like below after resume. This case appears on odroid u3 board. [ 9.727600] smsc95xx 1-2:1.0 eth0: kevent 2 may have been dropped [ 9.727648] smsc95xx 1-2:1.0 eth0: kevent 2 may have been dropped [ 9.727689] smsc95xx 1-2:1.0 eth0: kevent 2 may have been dropped [ 9.727728] smsc95xx 1-2:1.0 eth0: kevent 2 may have been dropped [ 9.729486] PM: resume of devices complete after 2011.219 msecs [ 10.117609] Restarting tasks ... done. [ 11.725099] smsc95xx 1-2:1.0 eth0: kevent 2 may have been dropped [ 13.480846] smsc95xx 1-2:1.0 eth0: kevent 2 may have been dropped [ 13.481361] smsc95xx 1-2:1.0 eth0: kevent 2 may have been dropped ... Signed-off-by: Joonyoung Shim Signed-off-by: David S. Miller --- drivers/net/usb/smsc95xx.c | 14 +++++++++++++- 1 file changed, 13 insertions(+), 1 deletion(-) diff --git a/drivers/net/usb/smsc95xx.c b/drivers/net/usb/smsc95xx.c index 424db65e43962..d07bf4cb893f8 100644 --- a/drivers/net/usb/smsc95xx.c +++ b/drivers/net/usb/smsc95xx.c @@ -1714,6 +1714,18 @@ static int smsc95xx_resume(struct usb_interface *intf) return ret; } +static int smsc95xx_reset_resume(struct usb_interface *intf) +{ + struct usbnet *dev = usb_get_intfdata(intf); + int ret; + + ret = smsc95xx_reset(dev); + if (ret < 0) + return ret; + + return smsc95xx_resume(intf); +} + static void smsc95xx_rx_csum_offload(struct sk_buff *skb) { skb->csum = *(u16 *)(skb_tail_pointer(skb) - 2); @@ -2004,7 +2016,7 @@ static struct usb_driver smsc95xx_driver = { .probe = usbnet_probe, .suspend = smsc95xx_suspend, .resume = smsc95xx_resume, - .reset_resume = smsc95xx_resume, + .reset_resume = smsc95xx_reset_resume, .disconnect = usbnet_disconnect, .disable_hub_initiated_lpm = 1, .supports_autosuspend = 1, From 948264879b6894dc389a44b99fae4f0b72932619 Mon Sep 17 00:00:00 2001 From: Todd Fujinaka Date: Thu, 10 Jul 2014 01:47:15 -0700 Subject: [PATCH 129/268] igb: Workaround for i210 Errata 25: Slow System Clock On some devices, the internal PLL circuit occasionally provides the wrong clock frequency after power up. The probability of failure is less than one failure per 1000 power cycles. When the failure occurs, the internal clock frequency is around 1/20 of the correct frequency. Cc: stable Signed-off-by: Todd Fujinaka Tested-by: Aaron Brown Signed-off-by: Jeff Kirsher Signed-off-by: David S. Miller --- drivers/net/ethernet/intel/igb/e1000_82575.c | 7 ++ .../net/ethernet/intel/igb/e1000_defines.h | 18 ++--- drivers/net/ethernet/intel/igb/e1000_hw.h | 3 + drivers/net/ethernet/intel/igb/e1000_i210.c | 66 +++++++++++++++++++ drivers/net/ethernet/intel/igb/e1000_i210.h | 12 ++++ drivers/net/ethernet/intel/igb/e1000_regs.h | 1 + drivers/net/ethernet/intel/igb/igb_main.c | 14 ++++ 7 files changed, 113 insertions(+), 8 deletions(-) diff --git a/drivers/net/ethernet/intel/igb/e1000_82575.c b/drivers/net/ethernet/intel/igb/e1000_82575.c index a2db388cc31e6..ee74f9536b31b 100644 --- a/drivers/net/ethernet/intel/igb/e1000_82575.c +++ b/drivers/net/ethernet/intel/igb/e1000_82575.c @@ -1481,6 +1481,13 @@ static s32 igb_init_hw_82575(struct e1000_hw *hw) s32 ret_val; u16 i, rar_count = mac->rar_entry_count; + if ((hw->mac.type >= e1000_i210) && + !(igb_get_flash_presence_i210(hw))) { + ret_val = igb_pll_workaround_i210(hw); + if (ret_val) + return ret_val; + } + /* Initialize identification LED */ ret_val = igb_id_led_init(hw); if (ret_val) { diff --git a/drivers/net/ethernet/intel/igb/e1000_defines.h b/drivers/net/ethernet/intel/igb/e1000_defines.h index 2a8bb35c2df2b..217f8138851bf 100644 --- a/drivers/net/ethernet/intel/igb/e1000_defines.h +++ b/drivers/net/ethernet/intel/igb/e1000_defines.h @@ -46,14 +46,15 @@ #define E1000_CTRL_EXT_SDP3_DIR 0x00000800 /* SDP3 Data direction */ /* Physical Func Reset Done Indication */ -#define E1000_CTRL_EXT_PFRSTD 0x00004000 -#define E1000_CTRL_EXT_LINK_MODE_MASK 0x00C00000 -#define E1000_CTRL_EXT_LINK_MODE_PCIE_SERDES 0x00C00000 -#define E1000_CTRL_EXT_LINK_MODE_1000BASE_KX 0x00400000 -#define E1000_CTRL_EXT_LINK_MODE_SGMII 0x00800000 -#define E1000_CTRL_EXT_LINK_MODE_GMII 0x00000000 -#define E1000_CTRL_EXT_EIAME 0x01000000 -#define E1000_CTRL_EXT_IRCA 0x00000001 +#define E1000_CTRL_EXT_PFRSTD 0x00004000 +#define E1000_CTRL_EXT_SDLPE 0X00040000 /* SerDes Low Power Enable */ +#define E1000_CTRL_EXT_LINK_MODE_MASK 0x00C00000 +#define E1000_CTRL_EXT_LINK_MODE_PCIE_SERDES 0x00C00000 +#define E1000_CTRL_EXT_LINK_MODE_1000BASE_KX 0x00400000 +#define E1000_CTRL_EXT_LINK_MODE_SGMII 0x00800000 +#define E1000_CTRL_EXT_LINK_MODE_GMII 0x00000000 +#define E1000_CTRL_EXT_EIAME 0x01000000 +#define E1000_CTRL_EXT_IRCA 0x00000001 /* Interrupt delay cancellation */ /* Driver loaded bit for FW */ #define E1000_CTRL_EXT_DRV_LOAD 0x10000000 @@ -62,6 +63,7 @@ /* packet buffer parity error detection enabled */ /* descriptor FIFO parity error detection enable */ #define E1000_CTRL_EXT_PBA_CLR 0x80000000 /* PBA Clear */ +#define E1000_CTRL_EXT_PHYPDEN 0x00100000 #define E1000_I2CCMD_REG_ADDR_SHIFT 16 #define E1000_I2CCMD_PHY_ADDR_SHIFT 24 #define E1000_I2CCMD_OPCODE_READ 0x08000000 diff --git a/drivers/net/ethernet/intel/igb/e1000_hw.h b/drivers/net/ethernet/intel/igb/e1000_hw.h index 89925e4058498..ce55ea5d750cd 100644 --- a/drivers/net/ethernet/intel/igb/e1000_hw.h +++ b/drivers/net/ethernet/intel/igb/e1000_hw.h @@ -567,4 +567,7 @@ struct net_device *igb_get_hw_dev(struct e1000_hw *hw); /* These functions must be implemented by drivers */ s32 igb_read_pcie_cap_reg(struct e1000_hw *hw, u32 reg, u16 *value); s32 igb_write_pcie_cap_reg(struct e1000_hw *hw, u32 reg, u16 *value); + +void igb_read_pci_cfg(struct e1000_hw *hw, u32 reg, u16 *value); +void igb_write_pci_cfg(struct e1000_hw *hw, u32 reg, u16 *value); #endif /* _E1000_HW_H_ */ diff --git a/drivers/net/ethernet/intel/igb/e1000_i210.c b/drivers/net/ethernet/intel/igb/e1000_i210.c index 337161f440dd6..65d931669f813 100644 --- a/drivers/net/ethernet/intel/igb/e1000_i210.c +++ b/drivers/net/ethernet/intel/igb/e1000_i210.c @@ -834,3 +834,69 @@ s32 igb_init_nvm_params_i210(struct e1000_hw *hw) } return ret_val; } + +/** + * igb_pll_workaround_i210 + * @hw: pointer to the HW structure + * + * Works around an errata in the PLL circuit where it occasionally + * provides the wrong clock frequency after power up. + **/ +s32 igb_pll_workaround_i210(struct e1000_hw *hw) +{ + s32 ret_val; + u32 wuc, mdicnfg, ctrl, ctrl_ext, reg_val; + u16 nvm_word, phy_word, pci_word, tmp_nvm; + int i; + + /* Get and set needed register values */ + wuc = rd32(E1000_WUC); + mdicnfg = rd32(E1000_MDICNFG); + reg_val = mdicnfg & ~E1000_MDICNFG_EXT_MDIO; + wr32(E1000_MDICNFG, reg_val); + + /* Get data from NVM, or set default */ + ret_val = igb_read_invm_word_i210(hw, E1000_INVM_AUTOLOAD, + &nvm_word); + if (ret_val) + nvm_word = E1000_INVM_DEFAULT_AL; + tmp_nvm = nvm_word | E1000_INVM_PLL_WO_VAL; + for (i = 0; i < E1000_MAX_PLL_TRIES; i++) { + /* check current state directly from internal PHY */ + igb_read_phy_reg_gs40g(hw, (E1000_PHY_PLL_FREQ_PAGE | + E1000_PHY_PLL_FREQ_REG), &phy_word); + if ((phy_word & E1000_PHY_PLL_UNCONF) + != E1000_PHY_PLL_UNCONF) { + ret_val = 0; + break; + } else { + ret_val = -E1000_ERR_PHY; + } + /* directly reset the internal PHY */ + ctrl = rd32(E1000_CTRL); + wr32(E1000_CTRL, ctrl|E1000_CTRL_PHY_RST); + + ctrl_ext = rd32(E1000_CTRL_EXT); + ctrl_ext |= (E1000_CTRL_EXT_PHYPDEN | E1000_CTRL_EXT_SDLPE); + wr32(E1000_CTRL_EXT, ctrl_ext); + + wr32(E1000_WUC, 0); + reg_val = (E1000_INVM_AUTOLOAD << 4) | (tmp_nvm << 16); + wr32(E1000_EEARBC_I210, reg_val); + + igb_read_pci_cfg(hw, E1000_PCI_PMCSR, &pci_word); + pci_word |= E1000_PCI_PMCSR_D3; + igb_write_pci_cfg(hw, E1000_PCI_PMCSR, &pci_word); + usleep_range(1000, 2000); + pci_word &= ~E1000_PCI_PMCSR_D3; + igb_write_pci_cfg(hw, E1000_PCI_PMCSR, &pci_word); + reg_val = (E1000_INVM_AUTOLOAD << 4) | (nvm_word << 16); + wr32(E1000_EEARBC_I210, reg_val); + + /* restore WUC register */ + wr32(E1000_WUC, wuc); + } + /* restore MDICNFG setting */ + wr32(E1000_MDICNFG, mdicnfg); + return ret_val; +} diff --git a/drivers/net/ethernet/intel/igb/e1000_i210.h b/drivers/net/ethernet/intel/igb/e1000_i210.h index 9f34976687bae..3442b6357d012 100644 --- a/drivers/net/ethernet/intel/igb/e1000_i210.h +++ b/drivers/net/ethernet/intel/igb/e1000_i210.h @@ -33,6 +33,7 @@ s32 igb_read_xmdio_reg(struct e1000_hw *hw, u16 addr, u8 dev_addr, u16 *data); s32 igb_write_xmdio_reg(struct e1000_hw *hw, u16 addr, u8 dev_addr, u16 data); s32 igb_init_nvm_params_i210(struct e1000_hw *hw); bool igb_get_flash_presence_i210(struct e1000_hw *hw); +s32 igb_pll_workaround_i210(struct e1000_hw *hw); #define E1000_STM_OPCODE 0xDB00 #define E1000_EEPROM_FLASH_SIZE_WORD 0x11 @@ -78,4 +79,15 @@ enum E1000_INVM_STRUCTURE_TYPE { #define NVM_LED_1_CFG_DEFAULT_I211 0x0184 #define NVM_LED_0_2_CFG_DEFAULT_I211 0x200C +/* PLL Defines */ +#define E1000_PCI_PMCSR 0x44 +#define E1000_PCI_PMCSR_D3 0x03 +#define E1000_MAX_PLL_TRIES 5 +#define E1000_PHY_PLL_UNCONF 0xFF +#define E1000_PHY_PLL_FREQ_PAGE 0xFC0000 +#define E1000_PHY_PLL_FREQ_REG 0x000E +#define E1000_INVM_DEFAULT_AL 0x202F +#define E1000_INVM_AUTOLOAD 0x0A +#define E1000_INVM_PLL_WO_VAL 0x0010 + #endif diff --git a/drivers/net/ethernet/intel/igb/e1000_regs.h b/drivers/net/ethernet/intel/igb/e1000_regs.h index 1cc4b1a7e597d..f5ba4e4eafb9c 100644 --- a/drivers/net/ethernet/intel/igb/e1000_regs.h +++ b/drivers/net/ethernet/intel/igb/e1000_regs.h @@ -66,6 +66,7 @@ #define E1000_PBA 0x01000 /* Packet Buffer Allocation - RW */ #define E1000_PBS 0x01008 /* Packet Buffer Size */ #define E1000_EEMNGCTL 0x01010 /* MNG EEprom Control */ +#define E1000_EEARBC_I210 0x12024 /* EEPROM Auto Read Bus Control */ #define E1000_EEWR 0x0102C /* EEPROM Write Register - RW */ #define E1000_I2CCMD 0x01028 /* SFPI2C Command Register - RW */ #define E1000_FRTIMER 0x01048 /* Free Running Timer - RW */ diff --git a/drivers/net/ethernet/intel/igb/igb_main.c b/drivers/net/ethernet/intel/igb/igb_main.c index f145adbb55ac0..57a96e00df8cc 100644 --- a/drivers/net/ethernet/intel/igb/igb_main.c +++ b/drivers/net/ethernet/intel/igb/igb_main.c @@ -7215,6 +7215,20 @@ static int igb_ioctl(struct net_device *netdev, struct ifreq *ifr, int cmd) } } +void igb_read_pci_cfg(struct e1000_hw *hw, u32 reg, u16 *value) +{ + struct igb_adapter *adapter = hw->back; + + pci_read_config_word(adapter->pdev, reg, value); +} + +void igb_write_pci_cfg(struct e1000_hw *hw, u32 reg, u16 *value) +{ + struct igb_adapter *adapter = hw->back; + + pci_write_config_word(adapter->pdev, reg, *value); +} + s32 igb_read_pcie_cap_reg(struct e1000_hw *hw, u32 reg, u16 *value) { struct igb_adapter *adapter = hw->back; From 4237ba43b65aa989674c89fc4f2fe46eebc501ee Mon Sep 17 00:00:00 2001 From: Miklos Szeredi Date: Thu, 10 Jul 2014 10:50:19 +0200 Subject: [PATCH 130/268] fuse: restructure ->rename2() Make ->rename2() universal, i.e. able to handle zero flags. This is to make future change of the API easier. Signed-off-by: Miklos Szeredi --- fs/fuse/dir.c | 34 ++++++++++++++++++++-------------- 1 file changed, 20 insertions(+), 14 deletions(-) diff --git a/fs/fuse/dir.c b/fs/fuse/dir.c index 202a9721be931..0c6048247a34e 100644 --- a/fs/fuse/dir.c +++ b/fs/fuse/dir.c @@ -815,13 +815,6 @@ static int fuse_rename_common(struct inode *olddir, struct dentry *oldent, return err; } -static int fuse_rename(struct inode *olddir, struct dentry *oldent, - struct inode *newdir, struct dentry *newent) -{ - return fuse_rename_common(olddir, oldent, newdir, newent, 0, - FUSE_RENAME, sizeof(struct fuse_rename_in)); -} - static int fuse_rename2(struct inode *olddir, struct dentry *oldent, struct inode *newdir, struct dentry *newent, unsigned int flags) @@ -832,17 +825,30 @@ static int fuse_rename2(struct inode *olddir, struct dentry *oldent, if (flags & ~(RENAME_NOREPLACE | RENAME_EXCHANGE)) return -EINVAL; - if (fc->no_rename2 || fc->minor < 23) - return -EINVAL; + if (flags) { + if (fc->no_rename2 || fc->minor < 23) + return -EINVAL; - err = fuse_rename_common(olddir, oldent, newdir, newent, flags, - FUSE_RENAME2, sizeof(struct fuse_rename2_in)); - if (err == -ENOSYS) { - fc->no_rename2 = 1; - err = -EINVAL; + err = fuse_rename_common(olddir, oldent, newdir, newent, flags, + FUSE_RENAME2, + sizeof(struct fuse_rename2_in)); + if (err == -ENOSYS) { + fc->no_rename2 = 1; + err = -EINVAL; + } + } else { + err = fuse_rename_common(olddir, oldent, newdir, newent, 0, + FUSE_RENAME, + sizeof(struct fuse_rename_in)); } + return err; +} +static int fuse_rename(struct inode *olddir, struct dentry *oldent, + struct inode *newdir, struct dentry *newent) +{ + return fuse_rename2(olddir, oldent, newdir, newent, 0); } static int fuse_link(struct dentry *entry, struct inode *newdir, From bbc1c5e8ad6dfebf9d13b8a4ccdf66c92913eac9 Mon Sep 17 00:00:00 2001 From: Lars Ellenberg Date: Wed, 9 Jul 2014 21:18:32 +0200 Subject: [PATCH 131/268] drbd: fix regression 'out of mem, failed to invoke fence-peer helper' Since linux kernel 3.13, kthread_run() internally uses wait_for_completion_killable(). We sometimes may use kthread_run() while we still have a signal pending, which we used to kick our threads out of potentially blocking network functions, causing kthread_run() to mistake that as a new fatal signal and fail. Fix: flush_signals() before kthread_run(). Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg Signed-off-by: Jens Axboe --- drivers/block/drbd/drbd_nl.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/drivers/block/drbd/drbd_nl.c b/drivers/block/drbd/drbd_nl.c index 1b35c45c92b75..3f2e167380805 100644 --- a/drivers/block/drbd/drbd_nl.c +++ b/drivers/block/drbd/drbd_nl.c @@ -544,6 +544,12 @@ void conn_try_outdate_peer_async(struct drbd_connection *connection) struct task_struct *opa; kref_get(&connection->kref); + /* We may just have force_sig()'ed this thread + * to get it out of some blocking network function. + * Clear signals; otherwise kthread_run(), which internally uses + * wait_on_completion_killable(), will mistake our pending signal + * for a new fatal signal and fail. */ + flush_signals(current); opa = kthread_run(_try_outdate_peer_async, connection, "drbd_async_h"); if (IS_ERR(opa)) { drbd_err(connection, "out of mem, failed to invoke fence-peer helper\n"); From 29e2435fd6d71e0136e2c2ff0433b7dbeeaaccfd Mon Sep 17 00:00:00 2001 From: Catalin Marinas Date: Tue, 8 Jul 2014 16:54:18 +0100 Subject: [PATCH 132/268] efi: fdt: Do not report an error during boot if UEFI is not available Currently, fdt_find_uefi_params() reports an error if no EFI parameters are found in the DT. This is however a valid case for non-UEFI kernel booting. This patch checks changes the error reporting to a pr_info("UEFI not found") when no EFI parameters are found in the DT. Signed-off-by: Catalin Marinas Acked-by: Leif Lindholm Tested-by: Leif Lindholm Signed-off-by: Matt Fleming --- drivers/firmware/efi/efi.c | 22 +++++++++++++++------- 1 file changed, 15 insertions(+), 7 deletions(-) diff --git a/drivers/firmware/efi/efi.c b/drivers/firmware/efi/efi.c index eff1a2f22f09b..dc79346689e60 100644 --- a/drivers/firmware/efi/efi.c +++ b/drivers/firmware/efi/efi.c @@ -346,6 +346,7 @@ static __initdata struct { struct param_info { int verbose; + int found; void *params; }; @@ -362,16 +363,12 @@ static int __init fdt_find_uefi_params(unsigned long node, const char *uname, (strcmp(uname, "chosen") != 0 && strcmp(uname, "chosen@0") != 0)) return 0; - pr_info("Getting parameters from FDT:\n"); - for (i = 0; i < ARRAY_SIZE(dt_params); i++) { prop = of_get_flat_dt_prop(node, dt_params[i].propname, &len); - if (!prop) { - pr_err("Can't find %s in device tree!\n", - dt_params[i].name); + if (!prop) return 0; - } dest = info->params + dt_params[i].offset; + info->found++; val = of_read_number(prop, len / sizeof(u32)); @@ -390,10 +387,21 @@ static int __init fdt_find_uefi_params(unsigned long node, const char *uname, int __init efi_get_fdt_params(struct efi_fdt_params *params, int verbose) { struct param_info info; + int ret; + + pr_info("Getting EFI parameters from FDT:\n"); info.verbose = verbose; + info.found = 0; info.params = params; - return of_scan_flat_dt(fdt_find_uefi_params, &info); + ret = of_scan_flat_dt(fdt_find_uefi_params, &info); + if (!info.found) + pr_info("UEFI not found.\n"); + else if (!ret) + pr_err("Can't find '%s' in device tree!\n", + dt_params[info.found].name); + + return ret; } #endif /* CONFIG_EFI_PARAMS_FROM_FDT */ From c7fb93ec51d462ec3540a729ba446663c26a0505 Mon Sep 17 00:00:00 2001 From: Michael Brown Date: Thu, 10 Jul 2014 12:26:20 +0100 Subject: [PATCH 133/268] x86/efi: Include a .bss section within the PE/COFF headers MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The PE/COFF headers currently describe only the initialised-data portions of the image, and result in no space being allocated for the uninitialised-data portions. Consequently, the EFI boot stub will end up overwriting unexpected areas of memory, with unpredictable results. Fix by including a .bss section in the PE/COFF headers (functionally equivalent to the init_size field in the bzImage header). Signed-off-by: Michael Brown Cc: Thomas Bächler Cc: Josh Boyer Cc: Signed-off-by: Matt Fleming --- arch/x86/boot/header.S | 26 +++++++++++++++++++++---- arch/x86/boot/tools/build.c | 38 +++++++++++++++++++++++++++++-------- 2 files changed, 52 insertions(+), 12 deletions(-) diff --git a/arch/x86/boot/header.S b/arch/x86/boot/header.S index 84c223479e3c9..7a6d43a554d77 100644 --- a/arch/x86/boot/header.S +++ b/arch/x86/boot/header.S @@ -91,10 +91,9 @@ bs_die: .section ".bsdata", "a" bugger_off_msg: - .ascii "Direct floppy boot is not supported. " - .ascii "Use a boot loader program instead.\r\n" + .ascii "Use a boot loader.\r\n" .ascii "\n" - .ascii "Remove disk and press any key to reboot ...\r\n" + .ascii "Remove disk and press any key to reboot...\r\n" .byte 0 #ifdef CONFIG_EFI_STUB @@ -108,7 +107,7 @@ coff_header: #else .word 0x8664 # x86-64 #endif - .word 3 # nr_sections + .word 4 # nr_sections .long 0 # TimeDateStamp .long 0 # PointerToSymbolTable .long 1 # NumberOfSymbols @@ -250,6 +249,25 @@ section_table: .word 0 # NumberOfLineNumbers .long 0x60500020 # Characteristics (section flags) + # + # The offset & size fields are filled in by build.c. + # + .ascii ".bss" + .byte 0 + .byte 0 + .byte 0 + .byte 0 + .long 0 + .long 0x0 + .long 0 # Size of initialized data + # on disk + .long 0x0 + .long 0 # PointerToRelocations + .long 0 # PointerToLineNumbers + .word 0 # NumberOfRelocations + .word 0 # NumberOfLineNumbers + .long 0xc8000080 # Characteristics (section flags) + #endif /* CONFIG_EFI_STUB */ # Kernel attributes; used by setup. This is part 1 of the diff --git a/arch/x86/boot/tools/build.c b/arch/x86/boot/tools/build.c index 1a2f2121cada2..a7661c430cd98 100644 --- a/arch/x86/boot/tools/build.c +++ b/arch/x86/boot/tools/build.c @@ -143,7 +143,7 @@ static void usage(void) #ifdef CONFIG_EFI_STUB -static void update_pecoff_section_header(char *section_name, u32 offset, u32 size) +static void update_pecoff_section_header_fields(char *section_name, u32 vma, u32 size, u32 datasz, u32 offset) { unsigned int pe_header; unsigned short num_sections; @@ -164,10 +164,10 @@ static void update_pecoff_section_header(char *section_name, u32 offset, u32 siz put_unaligned_le32(size, section + 0x8); /* section header vma field */ - put_unaligned_le32(offset, section + 0xc); + put_unaligned_le32(vma, section + 0xc); /* section header 'size of initialised data' field */ - put_unaligned_le32(size, section + 0x10); + put_unaligned_le32(datasz, section + 0x10); /* section header 'file offset' field */ put_unaligned_le32(offset, section + 0x14); @@ -179,6 +179,11 @@ static void update_pecoff_section_header(char *section_name, u32 offset, u32 siz } } +static void update_pecoff_section_header(char *section_name, u32 offset, u32 size) +{ + update_pecoff_section_header_fields(section_name, offset, size, size, offset); +} + static void update_pecoff_setup_and_reloc(unsigned int size) { u32 setup_offset = 0x200; @@ -203,9 +208,6 @@ static void update_pecoff_text(unsigned int text_start, unsigned int file_sz) pe_header = get_unaligned_le32(&buf[0x3c]); - /* Size of image */ - put_unaligned_le32(file_sz, &buf[pe_header + 0x50]); - /* * Size of code: Subtract the size of the first sector (512 bytes) * which includes the header. @@ -220,6 +222,22 @@ static void update_pecoff_text(unsigned int text_start, unsigned int file_sz) update_pecoff_section_header(".text", text_start, text_sz); } +static void update_pecoff_bss(unsigned int file_sz, unsigned int init_sz) +{ + unsigned int pe_header; + unsigned int bss_sz = init_sz - file_sz; + + pe_header = get_unaligned_le32(&buf[0x3c]); + + /* Size of uninitialized data */ + put_unaligned_le32(bss_sz, &buf[pe_header + 0x24]); + + /* Size of image */ + put_unaligned_le32(init_sz, &buf[pe_header + 0x50]); + + update_pecoff_section_header_fields(".bss", file_sz, bss_sz, 0, 0); +} + static int reserve_pecoff_reloc_section(int c) { /* Reserve 0x20 bytes for .reloc section */ @@ -259,6 +277,8 @@ static void efi_stub_entry_update(void) static inline void update_pecoff_setup_and_reloc(unsigned int size) {} static inline void update_pecoff_text(unsigned int text_start, unsigned int file_sz) {} +static inline void update_pecoff_bss(unsigned int file_sz, + unsigned int init_sz) {} static inline void efi_stub_defaults(void) {} static inline void efi_stub_entry_update(void) {} @@ -310,7 +330,7 @@ static void parse_zoffset(char *fname) int main(int argc, char ** argv) { - unsigned int i, sz, setup_sectors; + unsigned int i, sz, setup_sectors, init_sz; int c; u32 sys_size; struct stat sb; @@ -376,7 +396,9 @@ int main(int argc, char ** argv) buf[0x1f1] = setup_sectors-1; put_unaligned_le32(sys_size, &buf[0x1f4]); - update_pecoff_text(setup_sectors * 512, sz + i + ((sys_size * 16) - sz)); + update_pecoff_text(setup_sectors * 512, i + (sys_size * 16)); + init_sz = get_unaligned_le32(&buf[0x260]); + update_pecoff_bss(i + (sys_size * 16), init_sz); efi_stub_entry_update(); From 76252723e88681628a3dbb9c09c963e095476f73 Mon Sep 17 00:00:00 2001 From: Stefan Assmann Date: Thu, 10 Jul 2014 03:29:39 -0700 Subject: [PATCH 134/268] igb: do a reset on SR-IOV re-init if device is down To properly re-initialize SR-IOV it is necessary to reset the device even if it is already down. Not doing this may result in Tx unit hangs. Cc: stable Signed-off-by: Stefan Assmann Tested-by: Aaron Brown Signed-off-by: Jeff Kirsher Signed-off-by: David S. Miller --- drivers/net/ethernet/intel/igb/igb_main.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/net/ethernet/intel/igb/igb_main.c b/drivers/net/ethernet/intel/igb/igb_main.c index 57a96e00df8cc..a9537ba7a5a07 100644 --- a/drivers/net/ethernet/intel/igb/igb_main.c +++ b/drivers/net/ethernet/intel/igb/igb_main.c @@ -7592,6 +7592,8 @@ static int igb_sriov_reinit(struct pci_dev *dev) if (netif_running(netdev)) igb_close(netdev); + else + igb_reset(adapter); igb_clear_interrupt_scheme(adapter); From 78b3321610bf920d7fceb1a0236faa881be0bcf3 Mon Sep 17 00:00:00 2001 From: Srinivas Pandruvada Date: Thu, 7 Aug 2014 22:03:00 +0100 Subject: [PATCH 135/268] iio:core: Handle error when mask type is not separate When event spec is shared by multiple channels, which has definition for mask_shared_by_type, iio_device_register_eventset fails. For example: static const struct iio_event_spec iio_dummy_events[] = { { .type = IIO_EV_TYPE_THRESH, .dir = IIO_EV_DIR_RISING, .mask_separate = BIT(IIO_EV_INFO_ENABLE), .mask_shared_by_type = BIT(IIO_EV_INFO_VALUE), }, { .type = IIO_EV_TYPE_THRESH, .dir = IIO_EV_DIR_FALLING, .mask_separate = BIT(IIO_EV_INFO_ENABLE),a .mask_shared_by_type = BIT(IIO_EV_INFO_VALUE), } }; If two channels use this event spec, this will result in error. This change handles EBUSY error similar to iio_device_add_info_mask_type(). Signed-off-by: Srinivas Pandruvada Signed-off-by: Jonathan Cameron Cc: Stable@vger.kernel.org --- drivers/iio/industrialio-event.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/iio/industrialio-event.c b/drivers/iio/industrialio-event.c index 258a973a1fb8d..bfbf4d419f41c 100644 --- a/drivers/iio/industrialio-event.c +++ b/drivers/iio/industrialio-event.c @@ -345,6 +345,9 @@ static int iio_device_add_event(struct iio_dev *indio_dev, &indio_dev->event_interface->dev_attr_list); kfree(postfix); + if ((ret == -EBUSY) && (shared_by != IIO_SEPARATE)) + continue; + if (ret) return ret; From 19eeb2f9e750f47c805f66e3b0e889b12557d80f Mon Sep 17 00:00:00 2001 From: Alexey Khoroshilov Date: Thu, 10 Jul 2014 18:43:01 -0400 Subject: [PATCH 136/268] farsync: fix invalid memory accesses in fst_add_one() and fst_init_card() There are several issues in fst_add_one() and fst_init_card(): - invalid pointer dereference at card->ports[card->nports - 1] if register_hdlc_device() fails for the first port in fst_init_card(); - fst_card_array overflow at fst_card_array[no_of_cards_added] because there is no checks for array overflow; - use after free because pointer to deallocated card is left in fst_card_array if something fails after fst_card_array[no_of_cards_added] = card; - several leaks on failure paths in fst_add_one(). The patch fixes all the issues and makes code more readable. Found by Linux Driver Verification project (linuxtesting.org). Signed-off-by: Alexey Khoroshilov Signed-off-by: David S. Miller --- drivers/net/wan/farsync.c | 112 ++++++++++++++++++++------------------ 1 file changed, 58 insertions(+), 54 deletions(-) diff --git a/drivers/net/wan/farsync.c b/drivers/net/wan/farsync.c index 93ace042d0aa7..1f041271f7fec 100644 --- a/drivers/net/wan/farsync.c +++ b/drivers/net/wan/farsync.c @@ -2363,7 +2363,7 @@ static char *type_strings[] = { "FarSync TE1" }; -static void +static int fst_init_card(struct fst_card_info *card) { int i; @@ -2374,24 +2374,21 @@ fst_init_card(struct fst_card_info *card) * we'll have to revise it in some way then. */ for (i = 0; i < card->nports; i++) { - err = register_hdlc_device(card->ports[i].dev); - if (err < 0) { - int j; + err = register_hdlc_device(card->ports[i].dev); + if (err < 0) { pr_err("Cannot register HDLC device for port %d (errno %d)\n", - i, -err); - for (j = i; j < card->nports; j++) { - free_netdev(card->ports[j].dev); - card->ports[j].dev = NULL; - } - card->nports = i; - break; - } + i, -err); + while (i--) + unregister_hdlc_device(card->ports[i].dev); + return err; + } } pr_info("%s-%s: %s IRQ%d, %d ports\n", port_to_dev(&card->ports[0])->name, port_to_dev(&card->ports[card->nports - 1])->name, type_strings[card->type], card->irq, card->nports); + return 0; } static const struct net_device_ops fst_ops = { @@ -2447,15 +2444,12 @@ fst_add_one(struct pci_dev *pdev, const struct pci_device_id *ent) /* Try to enable the device */ if ((err = pci_enable_device(pdev)) != 0) { pr_err("Failed to enable card. Err %d\n", -err); - kfree(card); - return err; + goto enable_fail; } if ((err = pci_request_regions(pdev, "FarSync")) !=0) { pr_err("Failed to allocate regions. Err %d\n", -err); - pci_disable_device(pdev); - kfree(card); - return err; + goto regions_fail; } /* Get virtual addresses of memory regions */ @@ -2464,30 +2458,21 @@ fst_add_one(struct pci_dev *pdev, const struct pci_device_id *ent) card->phys_ctlmem = pci_resource_start(pdev, 3); if ((card->mem = ioremap(card->phys_mem, FST_MEMSIZE)) == NULL) { pr_err("Physical memory remap failed\n"); - pci_release_regions(pdev); - pci_disable_device(pdev); - kfree(card); - return -ENODEV; + err = -ENODEV; + goto ioremap_physmem_fail; } if ((card->ctlmem = ioremap(card->phys_ctlmem, 0x10)) == NULL) { pr_err("Control memory remap failed\n"); - pci_release_regions(pdev); - pci_disable_device(pdev); - iounmap(card->mem); - kfree(card); - return -ENODEV; + err = -ENODEV; + goto ioremap_ctlmem_fail; } dbg(DBG_PCI, "kernel mem %p, ctlmem %p\n", card->mem, card->ctlmem); /* Register the interrupt handler */ if (request_irq(pdev->irq, fst_intr, IRQF_SHARED, FST_DEV_NAME, card)) { pr_err("Unable to register interrupt %d\n", card->irq); - pci_release_regions(pdev); - pci_disable_device(pdev); - iounmap(card->ctlmem); - iounmap(card->mem); - kfree(card); - return -ENODEV; + err = -ENODEV; + goto irq_fail; } /* Record info we need */ @@ -2513,13 +2498,8 @@ fst_add_one(struct pci_dev *pdev, const struct pci_device_id *ent) while (i--) free_netdev(card->ports[i].dev); pr_err("FarSync: out of memory\n"); - free_irq(card->irq, card); - pci_release_regions(pdev); - pci_disable_device(pdev); - iounmap(card->ctlmem); - iounmap(card->mem); - kfree(card); - return -ENODEV; + err = -ENOMEM; + goto hdlcdev_fail; } card->ports[i].dev = dev; card->ports[i].card = card; @@ -2565,9 +2545,16 @@ fst_add_one(struct pci_dev *pdev, const struct pci_device_id *ent) pci_set_drvdata(pdev, card); /* Remainder of card setup */ + if (no_of_cards_added >= FST_MAX_CARDS) { + pr_err("FarSync: too many cards\n"); + err = -ENOMEM; + goto card_array_fail; + } fst_card_array[no_of_cards_added] = card; card->card_no = no_of_cards_added++; /* Record instance and bump it */ - fst_init_card(card); + err = fst_init_card(card); + if (err) + goto init_card_fail; if (card->family == FST_FAMILY_TXU) { /* * Allocate a dma buffer for transmit and receives @@ -2577,29 +2564,46 @@ fst_add_one(struct pci_dev *pdev, const struct pci_device_id *ent) &card->rx_dma_handle_card); if (card->rx_dma_handle_host == NULL) { pr_err("Could not allocate rx dma buffer\n"); - fst_disable_intr(card); - pci_release_regions(pdev); - pci_disable_device(pdev); - iounmap(card->ctlmem); - iounmap(card->mem); - kfree(card); - return -ENOMEM; + err = -ENOMEM; + goto rx_dma_fail; } card->tx_dma_handle_host = pci_alloc_consistent(card->device, FST_MAX_MTU, &card->tx_dma_handle_card); if (card->tx_dma_handle_host == NULL) { pr_err("Could not allocate tx dma buffer\n"); - fst_disable_intr(card); - pci_release_regions(pdev); - pci_disable_device(pdev); - iounmap(card->ctlmem); - iounmap(card->mem); - kfree(card); - return -ENOMEM; + err = -ENOMEM; + goto tx_dma_fail; } } return 0; /* Success */ + +tx_dma_fail: + pci_free_consistent(card->device, FST_MAX_MTU, + card->rx_dma_handle_host, + card->rx_dma_handle_card); +rx_dma_fail: + fst_disable_intr(card); + for (i = 0 ; i < card->nports ; i++) + unregister_hdlc_device(card->ports[i].dev); +init_card_fail: + fst_card_array[card->card_no] = NULL; +card_array_fail: + for (i = 0 ; i < card->nports ; i++) + free_netdev(card->ports[i].dev); +hdlcdev_fail: + free_irq(card->irq, card); +irq_fail: + iounmap(card->ctlmem); +ioremap_ctlmem_fail: + iounmap(card->mem); +ioremap_physmem_fail: + pci_release_regions(pdev); +regions_fail: + pci_disable_device(pdev); +enable_fail: + kfree(card); + return err; } /* From d0a7ebbc119738439ff00f7fadbd343ae20ea5e8 Mon Sep 17 00:00:00 2001 From: Amritha Nambiar Date: Thu, 10 Jul 2014 17:29:21 -0700 Subject: [PATCH 137/268] GRE: enable offloads for GRE To get offloads to work with Generic Routing Encapsulation (GRE), the outer transport header has to be reset after skb_push is done. This patch has the support for this fix and hence GRE offloading. Signed-off-by: Amritha Nambiar Signed-off-by: Joseph Gasparakis Tested-By: Jim Young Signed-off-by: Jeff Kirsher Signed-off-by: David S. Miller --- net/ipv4/gre_demux.c | 1 + 1 file changed, 1 insertion(+) diff --git a/net/ipv4/gre_demux.c b/net/ipv4/gre_demux.c index 4e9619bca732b..0485bf7f8f030 100644 --- a/net/ipv4/gre_demux.c +++ b/net/ipv4/gre_demux.c @@ -68,6 +68,7 @@ void gre_build_header(struct sk_buff *skb, const struct tnl_ptk_info *tpi, skb_push(skb, hdr_len); + skb_reset_transport_header(skb); greh = (struct gre_base_hdr *)skb->data; greh->flags = tnl_flags_to_gre_flags(tpi->flags); greh->protocol = tpi->proto; From 4cad9f3b61c7268fa89ab8096e23202300399b5d Mon Sep 17 00:00:00 2001 From: Suresh Reddy Date: Fri, 11 Jul 2014 14:03:01 +0530 Subject: [PATCH 138/268] be2net: set EQ DB clear-intr bit in be_open() On BE3, if the clear-interrupt bit of the EQ doorbell is not set the first time it is armed, ocassionally we have observed that the EQ doesn't raise anymore interrupts even if it is in armed state. This patch fixes this by setting the clear-interrupt bit when EQs are armed for the first time in be_open(). Signed-off-by: Suresh Reddy Signed-off-by: Sathya Perla Signed-off-by: David S. Miller --- drivers/net/ethernet/emulex/benet/be_main.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/emulex/benet/be_main.c b/drivers/net/ethernet/emulex/benet/be_main.c index 34a26e42f19d3..1e187fb760f80 100644 --- a/drivers/net/ethernet/emulex/benet/be_main.c +++ b/drivers/net/ethernet/emulex/benet/be_main.c @@ -2902,7 +2902,7 @@ static int be_open(struct net_device *netdev) for_all_evt_queues(adapter, eqo, i) { napi_enable(&eqo->napi); be_enable_busy_poll(eqo); - be_eq_notify(adapter, eqo->q.id, true, false, 0); + be_eq_notify(adapter, eqo->q.id, true, true, 0); } adapter->flags |= BE_FLAGS_NAPI_ENABLED; From a91d45f1a343188793d6f2bdf1a72c64015a8255 Mon Sep 17 00:00:00 2001 From: hayeswang Date: Fri, 11 Jul 2014 16:48:27 +0800 Subject: [PATCH 139/268] r8152: fix r8152_csum_workaround function The transport offset of the IPv4 packet should be fixed and wouldn't be out of the hw limitation, so the r8152_csum_workaround() should be used for IPv6 packets. Signed-off-by: Hayes Wang Signed-off-by: David S. Miller --- drivers/net/usb/r8152.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/usb/r8152.c b/drivers/net/usb/r8152.c index a795ecf8d5b63..7bad2d316637a 100644 --- a/drivers/net/usb/r8152.c +++ b/drivers/net/usb/r8152.c @@ -1359,7 +1359,7 @@ static void r8152_csum_workaround(struct r8152 *tp, struct sk_buff *skb, struct sk_buff_head seg_list; struct sk_buff *segs, *nskb; - features &= ~(NETIF_F_IP_CSUM | NETIF_F_SG | NETIF_F_TSO); + features &= ~(NETIF_F_SG | NETIF_F_IPV6_CSUM | NETIF_F_TSO6); segs = skb_gso_segment(skb, features); if (IS_ERR(segs) || !segs) goto drop; From 999417549c16dd0e3a382aa9f6ae61688db03181 Mon Sep 17 00:00:00 2001 From: Jon Paul Maloy Date: Fri, 11 Jul 2014 08:45:27 -0400 Subject: [PATCH 140/268] tipc: clear 'next'-pointer of message fragments before reassembly If the 'next' pointer of the last fragment buffer in a message is not zeroed before reassembly, we risk ending up with a corrupt message, since the reassembly function itself isn't doing this. Currently, when a buffer is retrieved from the deferred queue of the broadcast link, the next pointer is not cleared, with the result as described above. This commit corrects this, and thereby fixes a bug that may occur when long broadcast messages are transmitted across dual interfaces. The bug has been present since 40ba3cdf542a469aaa9083fa041656e59b109b90 ("tipc: message reassembly using fragment chain") This commit should be applied to both net and net-next. Signed-off-by: Jon Maloy Signed-off-by: David S. Miller --- net/tipc/bcast.c | 1 + 1 file changed, 1 insertion(+) diff --git a/net/tipc/bcast.c b/net/tipc/bcast.c index 26631679a1faa..55c6c9d3e1cee 100644 --- a/net/tipc/bcast.c +++ b/net/tipc/bcast.c @@ -559,6 +559,7 @@ void tipc_bclink_rcv(struct sk_buff *buf) buf = node->bclink.deferred_head; node->bclink.deferred_head = buf->next; + buf->next = NULL; node->bclink.deferred_size--; goto receive; } From 1d9dbf154295aa33819b08340464ff15495e83d8 Mon Sep 17 00:00:00 2001 From: Darren Hart Date: Fri, 11 Jul 2014 11:03:39 +0000 Subject: [PATCH 141/268] ACPI / documentation: Remove reference to acpi_platform_device_ids from enumeration.txt As of: 4845934 ACPI / scan: use platform bus type by default for _HID enumeration ACPI uses the platform bus by default, changing the opt-in to an opt-out policy, eliminating the acpi_platform_device_ids table and replacing it with forbidden_id_list[]. Remove the qualifying paragraph from the acpi/enumeration documentation as it no longer applies. Reported-by: Max Eliaser Signed-off-by: Darren Hart Signed-off-by: Rafael J. Wysocki --- Documentation/acpi/enumeration.txt | 6 ------ 1 file changed, 6 deletions(-) diff --git a/Documentation/acpi/enumeration.txt b/Documentation/acpi/enumeration.txt index fd786ea13a1ff..e182be5e3c83c 100644 --- a/Documentation/acpi/enumeration.txt +++ b/Documentation/acpi/enumeration.txt @@ -60,12 +60,6 @@ If the driver needs to perform more complex initialization like getting and configuring GPIOs it can get its ACPI handle and extract this information from ACPI tables. -Currently the kernel is not able to automatically determine from which ACPI -device it should make the corresponding platform device so we need to add -the ACPI device explicitly to acpi_platform_device_ids list defined in -drivers/acpi/acpi_platform.c. This limitation is only for the platform -devices, SPI and I2C devices are created automatically as described below. - DMA support ~~~~~~~~~~~ DMA controllers enumerated via ACPI should be registered in the system to From aff008ad813c7cf3cfe7b532e7ba2c526c136f22 Mon Sep 17 00:00:00 2001 From: Guenter Roeck Date: Tue, 17 Jun 2014 15:51:02 -0700 Subject: [PATCH 142/268] platform_get_irq: Revert to platform_get_resource if of_irq_get fails Commits 9ec36ca (of/irq: do irq resolution in platform_get_irq) and ad69674 (of/irq: do irq resolution in platform_get_irq_byname) change the semantics of platform_get_irq and platform_get_irq_byname to always rely on devicetree information if devicetree is enabled and if a devicetree node is attached to the device. The functions now return an error if the devicetree data does not include interrupt information, even if the information is available as platform resource data. This causes mfd client drivers to fail if the interrupt number is passed via platform resources. Therefore, if of_irq_get fails, try platform_get_resource as method of last resort. This restores the original functionality for drivers depending on platform resources to get irq information. Cc: Russell King Cc: Tony Lindgren Cc: Grant Likely Cc: Grygorii Strashko Signed-off-by: Guenter Roeck Acked-by: Rob Herring Cc: stable Signed-off-by: Greg Kroah-Hartman --- drivers/base/platform.c | 18 ++++++++++++++---- 1 file changed, 14 insertions(+), 4 deletions(-) diff --git a/drivers/base/platform.c b/drivers/base/platform.c index 9e9227e1762d4..eee48c49f5def 100644 --- a/drivers/base/platform.c +++ b/drivers/base/platform.c @@ -89,8 +89,13 @@ int platform_get_irq(struct platform_device *dev, unsigned int num) return dev->archdata.irqs[num]; #else struct resource *r; - if (IS_ENABLED(CONFIG_OF_IRQ) && dev->dev.of_node) - return of_irq_get(dev->dev.of_node, num); + if (IS_ENABLED(CONFIG_OF_IRQ) && dev->dev.of_node) { + int ret; + + ret = of_irq_get(dev->dev.of_node, num); + if (ret >= 0 || ret == -EPROBE_DEFER) + return ret; + } r = platform_get_resource(dev, IORESOURCE_IRQ, num); @@ -133,8 +138,13 @@ int platform_get_irq_byname(struct platform_device *dev, const char *name) { struct resource *r; - if (IS_ENABLED(CONFIG_OF_IRQ) && dev->dev.of_node) - return of_irq_get_byname(dev->dev.of_node, name); + if (IS_ENABLED(CONFIG_OF_IRQ) && dev->dev.of_node) { + int ret; + + ret = of_irq_get_byname(dev->dev.of_node, name); + if (ret >= 0 || ret == -EPROBE_DEFER) + return ret; + } r = platform_get_resource_byname(dev, IORESOURCE_IRQ, name); return r ? r->start : -ENXIO; From 71702e6e52c8312f4c6797a9787d0f8b5656156f Mon Sep 17 00:00:00 2001 From: Martin Fuzzey Date: Fri, 7 Nov 2014 13:54:00 +0000 Subject: [PATCH 143/268] iio: mma8452: Use correct acceleration units. The userspace interface for acceleration sensors is documented as using m/s^2 units [Documentation/ABI/testing/sysfs-bus-iio] The fullscale raw value for the mma8452 (-2048) corresponds to -2G, -4G or -8G depending on the seleted mode. The scale table was converting to G rather than m/s^2. Change the scaling table to match the documented interface. Signed-off-by: Martin Fuzzey Signed-off-by: Jonathan Cameron Cc: stable@vger.kernel.org --- drivers/iio/accel/mma8452.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/drivers/iio/accel/mma8452.c b/drivers/iio/accel/mma8452.c index 17aeea1705665..2a5fa9a436e5c 100644 --- a/drivers/iio/accel/mma8452.c +++ b/drivers/iio/accel/mma8452.c @@ -111,8 +111,14 @@ static const int mma8452_samp_freq[8][2] = { {6, 250000}, {1, 560000} }; +/* + * Hardware has fullscale of -2G, -4G, -8G corresponding to raw value -2048 + * The userspace interface uses m/s^2 and we declare micro units + * So scale factor is given by: + * g * N * 1000000 / 2048 for N = 2, 4, 8 and g=9.80665 + */ static const int mma8452_scales[3][2] = { - {0, 977}, {0, 1953}, {0, 3906} + {0, 9577}, {0, 19154}, {0, 38307} }; static ssize_t mma8452_show_samp_freq_avail(struct device *dev, From 0b462c89e31f7eb6789713437eb551833ee16ff3 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Sat, 5 Jul 2014 18:43:21 -0400 Subject: [PATCH 144/268] blkcg: don't call into policy draining if root_blkg is already gone While a queue is being destroyed, all the blkgs are destroyed and its ->root_blkg pointer is set to NULL. If someone else starts to drain while the queue is in this state, the following oops happens. NULL pointer dereference at 0000000000000028 IP: [] blk_throtl_drain+0x84/0x230 PGD e4a1067 PUD b773067 PMD 0 Oops: 0000 [#1] PREEMPT SMP DEBUG_PAGEALLOC Modules linked in: cfq_iosched(-) [last unloaded: cfq_iosched] CPU: 1 PID: 537 Comm: bash Not tainted 3.16.0-rc3-work+ #2 Hardware name: Bochs Bochs, BIOS Bochs 01/01/2011 task: ffff88000e222250 ti: ffff88000efd4000 task.ti: ffff88000efd4000 RIP: 0010:[] [] blk_throtl_drain+0x84/0x230 RSP: 0018:ffff88000efd7bf0 EFLAGS: 00010046 RAX: 0000000000000000 RBX: ffff880015091450 RCX: 0000000000000001 RDX: 0000000000000000 RSI: 0000000000000000 RDI: 0000000000000000 RBP: ffff88000efd7c10 R08: 0000000000000000 R09: 0000000000000001 R10: ffff88000e222250 R11: 0000000000000000 R12: ffff880015091450 R13: ffff880015092e00 R14: ffff880015091d70 R15: ffff88001508fc28 FS: 00007f1332650740(0000) GS:ffff88001fa80000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 000000008005003b CR2: 0000000000000028 CR3: 0000000009446000 CR4: 00000000000006e0 Stack: ffffffff8144e8f6 ffff880015091450 0000000000000000 ffff880015091d80 ffff88000efd7c28 ffffffff8144ae2f ffff880015091450 ffff88000efd7c58 ffffffff81427641 ffff880015091450 ffffffff82401f00 ffff880015091450 Call Trace: [] blkcg_drain_queue+0x1f/0x60 [] __blk_drain_queue+0x71/0x180 [] blk_queue_bypass_start+0x6e/0xb0 [] blkcg_deactivate_policy+0x38/0x120 [] blk_throtl_exit+0x34/0x50 [] blkcg_exit_queue+0x35/0x40 [] blk_release_queue+0x26/0xd0 [] kobject_cleanup+0x38/0x70 [] kobject_put+0x28/0x60 [] blk_put_queue+0x15/0x20 [] scsi_device_dev_release_usercontext+0x16b/0x1c0 [] execute_in_process_context+0x89/0xa0 [] scsi_device_dev_release+0x1c/0x20 [] device_release+0x32/0xa0 [] kobject_cleanup+0x38/0x70 [] kobject_put+0x28/0x60 [] put_device+0x17/0x20 [] __scsi_remove_device+0xa9/0xe0 [] scsi_remove_device+0x2b/0x40 [] sdev_store_delete+0x27/0x30 [] dev_attr_store+0x18/0x30 [] sysfs_kf_write+0x3e/0x50 [] kernfs_fop_write+0xe7/0x170 [] vfs_write+0xaf/0x1d0 [] SyS_write+0x4d/0xc0 [] system_call_fastpath+0x16/0x1b 776687bce42b ("block, blk-mq: draining can't be skipped even if bypass_depth was non-zero") made it easier to trigger this bug by making blk_queue_bypass_start() drain even when it loses the first bypass test to blk_cleanup_queue(); however, the bug has always been there even before the commit as blk_queue_bypass_start() could race against queue destruction, win the initial bypass test but perform the actual draining after blk_cleanup_queue() already destroyed all blkgs. Fix it by skippping calling into policy draining if all the blkgs are already gone. Signed-off-by: Tejun Heo Reported-by: Shirish Pargaonkar Reported-by: Sasha Levin Reported-by: Jet Chen Cc: stable@vger.kernel.org Tested-by: Shirish Pargaonkar Signed-off-by: Jens Axboe --- block/blk-cgroup.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/block/blk-cgroup.c b/block/blk-cgroup.c index b9f4cc494ecef..28d227c5ca778 100644 --- a/block/blk-cgroup.c +++ b/block/blk-cgroup.c @@ -872,6 +872,13 @@ void blkcg_drain_queue(struct request_queue *q) { lockdep_assert_held(q->queue_lock); + /* + * @q could be exiting and already have destroyed all blkgs as + * indicated by NULL root_blkg. If so, don't confuse policies. + */ + if (!q->root_blkg) + return; + blk_throtl_drain(q); } From 17089a29a25a3bfe8d14520cd866b7d635ffe5ba Mon Sep 17 00:00:00 2001 From: Weston Andros Adamson Date: Fri, 11 Jul 2014 10:20:45 -0400 Subject: [PATCH 145/268] nfs: mark nfs_page reqs with flag for extra ref Change the use of PG_INODE_REF - set it when taking extra reference on subrequests and take care to only release once for each request. Signed-off-by: Weston Andros Adamson Signed-off-by: Trond Myklebust --- fs/nfs/pagelist.c | 4 +++- fs/nfs/write.c | 8 ++++++-- 2 files changed, 9 insertions(+), 3 deletions(-) diff --git a/fs/nfs/pagelist.c b/fs/nfs/pagelist.c index b6ee3a6ee96dd..7368b2130a412 100644 --- a/fs/nfs/pagelist.c +++ b/fs/nfs/pagelist.c @@ -251,8 +251,10 @@ nfs_page_group_init(struct nfs_page *req, struct nfs_page *prev) /* grab extra ref if head request has extra ref from * the write/commit path to handle handoff between write * and commit lists */ - if (test_bit(PG_INODE_REF, &prev->wb_head->wb_flags)) + if (test_bit(PG_INODE_REF, &prev->wb_head->wb_flags)) { + set_bit(PG_INODE_REF, &req->wb_flags); kref_get(&req->wb_kref); + } } } diff --git a/fs/nfs/write.c b/fs/nfs/write.c index 98ff061ccaf36..8e5745a4deff9 100644 --- a/fs/nfs/write.c +++ b/fs/nfs/write.c @@ -448,7 +448,9 @@ static void nfs_inode_add_request(struct inode *inode, struct nfs_page *req) set_page_private(req->wb_page, (unsigned long)req); } nfsi->npages++; - set_bit(PG_INODE_REF, &req->wb_flags); + /* this a head request for a page group - mark it as having an + * extra reference so sub groups can follow suit */ + WARN_ON(test_and_set_bit(PG_INODE_REF, &req->wb_flags)); kref_get(&req->wb_kref); spin_unlock(&inode->i_lock); } @@ -474,7 +476,9 @@ static void nfs_inode_remove_request(struct nfs_page *req) nfsi->npages--; spin_unlock(&inode->i_lock); } - nfs_release_request(req); + + if (test_and_clear_bit(PG_INODE_REF, &req->wb_flags)) + nfs_release_request(req); } static void From 85710a837c2026aae80b7c64187edf1f10027b0b Mon Sep 17 00:00:00 2001 From: Weston Andros Adamson Date: Fri, 11 Jul 2014 10:20:46 -0400 Subject: [PATCH 146/268] nfs: nfs_page should take a ref on the head req nfs_pages that aren't the the head of a group must take a reference on the head as long as ->wb_head is set to it. This stops the head from hitting a refcount of 0 while there is still an active nfs_page for the page group. This avoids kref warnings in the writeback code when the page group head is found and referenced. Signed-off-by: Weston Andros Adamson Signed-off-by: Trond Myklebust --- fs/nfs/pagelist.c | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/fs/nfs/pagelist.c b/fs/nfs/pagelist.c index 7368b2130a412..05a63593a61fb 100644 --- a/fs/nfs/pagelist.c +++ b/fs/nfs/pagelist.c @@ -239,15 +239,21 @@ nfs_page_group_init(struct nfs_page *req, struct nfs_page *prev) WARN_ON_ONCE(prev == req); if (!prev) { + /* a head request */ req->wb_head = req; req->wb_this_page = req; } else { + /* a subrequest */ WARN_ON_ONCE(prev->wb_this_page != prev->wb_head); WARN_ON_ONCE(!test_bit(PG_HEADLOCK, &prev->wb_head->wb_flags)); req->wb_head = prev->wb_head; req->wb_this_page = prev->wb_this_page; prev->wb_this_page = req; + /* All subrequests take a ref on the head request until + * nfs_page_group_destroy is called */ + kref_get(&req->wb_head->wb_kref); + /* grab extra ref if head request has extra ref from * the write/commit path to handle handoff between write * and commit lists */ @@ -271,6 +277,10 @@ nfs_page_group_destroy(struct kref *kref) struct nfs_page *req = container_of(kref, struct nfs_page, wb_kref); struct nfs_page *tmp, *next; + /* subrequests must release the ref on the head request */ + if (req->wb_head != req) + nfs_release_request(req->wb_head); + if (!nfs_page_group_sync_on_bit(req, PG_TEARDOWN)) return; From 84d3a9a913ba6a90c79b7763d063bb42554a8906 Mon Sep 17 00:00:00 2001 From: Weston Andros Adamson Date: Fri, 11 Jul 2014 10:20:47 -0400 Subject: [PATCH 147/268] nfs: change find_request to find_head_request nfs_page_find_request_locked* should find the head request for that page. Rename the functions and add comments to make this clear, and fix a bug that could return a subrequest when page_private isn't set on the page. Signed-off-by: Weston Andros Adamson Signed-off-by: Trond Myklebust --- fs/nfs/write.c | 33 ++++++++++++++++++++++++--------- 1 file changed, 24 insertions(+), 9 deletions(-) diff --git a/fs/nfs/write.c b/fs/nfs/write.c index 8e5745a4deff9..53c4a9917dacf 100644 --- a/fs/nfs/write.c +++ b/fs/nfs/write.c @@ -91,8 +91,15 @@ static void nfs_context_set_write_error(struct nfs_open_context *ctx, int error) set_bit(NFS_CONTEXT_ERROR_WRITE, &ctx->flags); } +/* + * nfs_page_find_head_request_locked - find head request associated with @page + * + * must be called while holding the inode lock. + * + * returns matching head request with reference held, or NULL if not found. + */ static struct nfs_page * -nfs_page_find_request_locked(struct nfs_inode *nfsi, struct page *page) +nfs_page_find_head_request_locked(struct nfs_inode *nfsi, struct page *page) { struct nfs_page *req = NULL; @@ -104,25 +111,33 @@ nfs_page_find_request_locked(struct nfs_inode *nfsi, struct page *page) /* Linearly search the commit list for the correct req */ list_for_each_entry_safe(freq, t, &nfsi->commit_info.list, wb_list) { if (freq->wb_page == page) { - req = freq; + req = freq->wb_head; break; } } } - if (req) + if (req) { + WARN_ON_ONCE(req->wb_head != req); + kref_get(&req->wb_kref); + } return req; } -static struct nfs_page *nfs_page_find_request(struct page *page) +/* + * nfs_page_find_head_request - find head request associated with @page + * + * returns matching head request with reference held, or NULL if not found. + */ +static struct nfs_page *nfs_page_find_head_request(struct page *page) { struct inode *inode = page_file_mapping(page)->host; struct nfs_page *req = NULL; spin_lock(&inode->i_lock); - req = nfs_page_find_request_locked(NFS_I(inode), page); + req = nfs_page_find_head_request_locked(NFS_I(inode), page); spin_unlock(&inode->i_lock); return req; } @@ -282,7 +297,7 @@ static struct nfs_page *nfs_find_and_lock_request(struct page *page, bool nonblo spin_lock(&inode->i_lock); for (;;) { - req = nfs_page_find_request_locked(NFS_I(inode), page); + req = nfs_page_find_head_request_locked(NFS_I(inode), page); if (req == NULL) break; if (nfs_lock_request(req)) @@ -773,7 +788,7 @@ static struct nfs_page *nfs_try_to_update_request(struct inode *inode, spin_lock(&inode->i_lock); for (;;) { - req = nfs_page_find_request_locked(NFS_I(inode), page); + req = nfs_page_find_head_request_locked(NFS_I(inode), page); if (req == NULL) goto out_unlock; @@ -881,7 +896,7 @@ int nfs_flush_incompatible(struct file *file, struct page *page) * dropped page. */ do { - req = nfs_page_find_request(page); + req = nfs_page_find_head_request(page); if (req == NULL) return 0; l_ctx = req->wb_lock_context; @@ -1575,7 +1590,7 @@ int nfs_wb_page_cancel(struct inode *inode, struct page *page) for (;;) { wait_on_page_writeback(page); - req = nfs_page_find_request(page); + req = nfs_page_find_head_request(page); if (req == NULL) break; if (nfs_lock_request(req)) { From d458138353726ea6dcbc53ae3597e489d0432c25 Mon Sep 17 00:00:00 2001 From: Weston Andros Adamson Date: Fri, 11 Jul 2014 10:20:48 -0400 Subject: [PATCH 148/268] nfs: handle multiple reqs in nfs_page_async_flush Change nfs_find_and_lock_request so nfs_page_async_flush can handle multiple requests in a page. There is only one request for a page the first time nfs_page_async_flush is called, but if a write or commit fails, async_flush is called again and there may be multiple requests associated with the page. The solution is to merge all the requests in a page group into a single request before calling nfs_pageio_add_request. Rename nfs_find_and_lock_request to nfs_lock_and_join_requests and change it to first lock all requests for the page, then cancel and merge all subrequests into the head request. Signed-off-by: Weston Andros Adamson Signed-off-by: Trond Myklebust --- fs/nfs/internal.h | 1 + fs/nfs/pagelist.c | 4 +- fs/nfs/write.c | 255 ++++++++++++++++++++++++++++++++++++++++++---- 3 files changed, 235 insertions(+), 25 deletions(-) diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h index 82ddbf46660e3..f415cbf9f6c3f 100644 --- a/fs/nfs/internal.h +++ b/fs/nfs/internal.h @@ -244,6 +244,7 @@ void nfs_pgio_data_release(struct nfs_pgio_data *); int nfs_generic_pgio(struct nfs_pageio_descriptor *, struct nfs_pgio_header *); int nfs_initiate_pgio(struct rpc_clnt *, struct nfs_pgio_data *, const struct rpc_call_ops *, int, int); +void nfs_free_request(struct nfs_page *req); static inline void nfs_iocounter_init(struct nfs_io_counter *c) { diff --git a/fs/nfs/pagelist.c b/fs/nfs/pagelist.c index 05a63593a61fb..0aefc8102b6b4 100644 --- a/fs/nfs/pagelist.c +++ b/fs/nfs/pagelist.c @@ -29,8 +29,6 @@ static struct kmem_cache *nfs_page_cachep; static const struct rpc_call_ops nfs_pgio_common_ops; -static void nfs_free_request(struct nfs_page *); - static bool nfs_pgarray_set(struct nfs_page_array *p, unsigned int pagecount) { p->npages = pagecount; @@ -406,7 +404,7 @@ static void nfs_clear_request(struct nfs_page *req) * * Note: Should never be called with the spinlock held! */ -static void nfs_free_request(struct nfs_page *req) +void nfs_free_request(struct nfs_page *req) { WARN_ON_ONCE(req->wb_this_page != req); diff --git a/fs/nfs/write.c b/fs/nfs/write.c index 53c4a9917dacf..9f4424c464a05 100644 --- a/fs/nfs/write.c +++ b/fs/nfs/write.c @@ -46,6 +46,7 @@ static const struct rpc_call_ops nfs_commit_ops; static const struct nfs_pgio_completion_ops nfs_async_write_completion_ops; static const struct nfs_commit_completion_ops nfs_commit_completion_ops; static const struct nfs_rw_ops nfs_rw_write_ops; +static void nfs_clear_request_commit(struct nfs_page *req); static struct kmem_cache *nfs_wdata_cachep; static mempool_t *nfs_wdata_mempool; @@ -289,36 +290,246 @@ static void nfs_end_page_writeback(struct nfs_page *req) clear_bdi_congested(&nfss->backing_dev_info, BLK_RW_ASYNC); } -static struct nfs_page *nfs_find_and_lock_request(struct page *page, bool nonblock) + +/* nfs_page_group_clear_bits + * @req - an nfs request + * clears all page group related bits from @req + */ +static void +nfs_page_group_clear_bits(struct nfs_page *req) +{ + clear_bit(PG_TEARDOWN, &req->wb_flags); + clear_bit(PG_UNLOCKPAGE, &req->wb_flags); + clear_bit(PG_UPTODATE, &req->wb_flags); + clear_bit(PG_WB_END, &req->wb_flags); + clear_bit(PG_REMOVE, &req->wb_flags); +} + + +/* + * nfs_unroll_locks_and_wait - unlock all newly locked reqs and wait on @req + * + * this is a helper function for nfs_lock_and_join_requests + * + * @inode - inode associated with request page group, must be holding inode lock + * @head - head request of page group, must be holding head lock + * @req - request that couldn't lock and needs to wait on the req bit lock + * @nonblock - if true, don't actually wait + * + * NOTE: this must be called holding page_group bit lock and inode spin lock + * and BOTH will be released before returning. + * + * returns 0 on success, < 0 on error. + */ +static int +nfs_unroll_locks_and_wait(struct inode *inode, struct nfs_page *head, + struct nfs_page *req, bool nonblock) + __releases(&inode->i_lock) +{ + struct nfs_page *tmp; + int ret; + + /* relinquish all the locks successfully grabbed this run */ + for (tmp = head ; tmp != req; tmp = tmp->wb_this_page) + nfs_unlock_request(tmp); + + WARN_ON_ONCE(test_bit(PG_TEARDOWN, &req->wb_flags)); + + /* grab a ref on the request that will be waited on */ + kref_get(&req->wb_kref); + + nfs_page_group_unlock(head); + spin_unlock(&inode->i_lock); + + /* release ref from nfs_page_find_head_request_locked */ + nfs_release_request(head); + + if (!nonblock) + ret = nfs_wait_on_request(req); + else + ret = -EAGAIN; + nfs_release_request(req); + + return ret; +} + +/* + * nfs_destroy_unlinked_subrequests - destroy recently unlinked subrequests + * + * @destroy_list - request list (using wb_this_page) terminated by @old_head + * @old_head - the old head of the list + * + * All subrequests must be locked and removed from all lists, so at this point + * they are only "active" in this function, and possibly in nfs_wait_on_request + * with a reference held by some other context. + */ +static void +nfs_destroy_unlinked_subrequests(struct nfs_page *destroy_list, + struct nfs_page *old_head) +{ + while (destroy_list) { + struct nfs_page *subreq = destroy_list; + + destroy_list = (subreq->wb_this_page == old_head) ? + NULL : subreq->wb_this_page; + + WARN_ON_ONCE(old_head != subreq->wb_head); + + /* make sure old group is not used */ + subreq->wb_head = subreq; + subreq->wb_this_page = subreq; + + nfs_clear_request_commit(subreq); + + /* subreq is now totally disconnected from page group or any + * write / commit lists. last chance to wake any waiters */ + nfs_unlock_request(subreq); + + if (!test_bit(PG_TEARDOWN, &subreq->wb_flags)) { + /* release ref on old head request */ + nfs_release_request(old_head); + + nfs_page_group_clear_bits(subreq); + + /* release the PG_INODE_REF reference */ + if (test_and_clear_bit(PG_INODE_REF, &subreq->wb_flags)) + nfs_release_request(subreq); + else + WARN_ON_ONCE(1); + } else { + WARN_ON_ONCE(test_bit(PG_CLEAN, &subreq->wb_flags)); + /* zombie requests have already released the last + * reference and were waiting on the rest of the + * group to complete. Since it's no longer part of a + * group, simply free the request */ + nfs_page_group_clear_bits(subreq); + nfs_free_request(subreq); + } + } +} + +/* + * nfs_lock_and_join_requests - join all subreqs to the head req and return + * a locked reference, cancelling any pending + * operations for this page. + * + * @page - the page used to lookup the "page group" of nfs_page structures + * @nonblock - if true, don't block waiting for request locks + * + * This function joins all sub requests to the head request by first + * locking all requests in the group, cancelling any pending operations + * and finally updating the head request to cover the whole range covered by + * the (former) group. All subrequests are removed from any write or commit + * lists, unlinked from the group and destroyed. + * + * Returns a locked, referenced pointer to the head request - which after + * this call is guaranteed to be the only request associated with the page. + * Returns NULL if no requests are found for @page, or a ERR_PTR if an + * error was encountered. + */ +static struct nfs_page * +nfs_lock_and_join_requests(struct page *page, bool nonblock) { struct inode *inode = page_file_mapping(page)->host; - struct nfs_page *req; + struct nfs_page *head, *subreq; + struct nfs_page *destroy_list = NULL; + unsigned int total_bytes; int ret; +try_again: + total_bytes = 0; + + WARN_ON_ONCE(destroy_list); + spin_lock(&inode->i_lock); - for (;;) { - req = nfs_page_find_head_request_locked(NFS_I(inode), page); - if (req == NULL) - break; - if (nfs_lock_request(req)) - break; - /* Note: If we hold the page lock, as is the case in nfs_writepage, - * then the call to nfs_lock_request() will always - * succeed provided that someone hasn't already marked the - * request as dirty (in which case we don't care). - */ + + /* + * A reference is taken only on the head request which acts as a + * reference to the whole page group - the group will not be destroyed + * until the head reference is released. + */ + head = nfs_page_find_head_request_locked(NFS_I(inode), page); + + if (!head) { spin_unlock(&inode->i_lock); - if (!nonblock) - ret = nfs_wait_on_request(req); - else - ret = -EAGAIN; - nfs_release_request(req); - if (ret != 0) + return NULL; + } + + /* lock each request in the page group */ + nfs_page_group_lock(head); + subreq = head; + do { + /* + * Subrequests are always contiguous, non overlapping + * and in order. If not, it's a programming error. + */ + WARN_ON_ONCE(subreq->wb_offset != + (head->wb_offset + total_bytes)); + + /* keep track of how many bytes this group covers */ + total_bytes += subreq->wb_bytes; + + if (!nfs_lock_request(subreq)) { + /* releases page group bit lock and + * inode spin lock and all references */ + ret = nfs_unroll_locks_and_wait(inode, head, + subreq, nonblock); + + if (ret == 0) + goto try_again; + return ERR_PTR(ret); - spin_lock(&inode->i_lock); + } + + subreq = subreq->wb_this_page; + } while (subreq != head); + + /* Now that all requests are locked, make sure they aren't on any list. + * Commit list removal accounting is done after locks are dropped */ + subreq = head; + do { + nfs_list_remove_request(subreq); + subreq = subreq->wb_this_page; + } while (subreq != head); + + /* unlink subrequests from head, destroy them later */ + if (head->wb_this_page != head) { + /* destroy list will be terminated by head */ + destroy_list = head->wb_this_page; + head->wb_this_page = head; + + /* change head request to cover whole range that + * the former page group covered */ + head->wb_bytes = total_bytes; } + + /* + * prepare head request to be added to new pgio descriptor + */ + nfs_page_group_clear_bits(head); + + /* + * some part of the group was still on the inode list - otherwise + * the group wouldn't be involved in async write. + * grab a reference for the head request, iff it needs one. + */ + if (!test_and_set_bit(PG_INODE_REF, &head->wb_flags)) + kref_get(&head->wb_kref); + + nfs_page_group_unlock(head); + + /* drop lock to clear_request_commit the head req and clean up + * requests on destroy list */ spin_unlock(&inode->i_lock); - return req; + + nfs_destroy_unlinked_subrequests(destroy_list, head); + + /* clean up commit list state */ + nfs_clear_request_commit(head); + + /* still holds ref on head from nfs_page_find_head_request_locked + * and still has lock on head from lock loop */ + return head; } /* @@ -331,7 +542,7 @@ static int nfs_page_async_flush(struct nfs_pageio_descriptor *pgio, struct nfs_page *req; int ret = 0; - req = nfs_find_and_lock_request(page, nonblock); + req = nfs_lock_and_join_requests(page, nonblock); if (!req) goto out; ret = PTR_ERR(req); From 3e2170451e91327bfa8a82040fea78043847533a Mon Sep 17 00:00:00 2001 From: Weston Andros Adamson Date: Fri, 11 Jul 2014 10:20:49 -0400 Subject: [PATCH 149/268] nfs: handle multiple reqs in nfs_wb_page_cancel Use nfs_lock_and_join_requests to merge all subrequests into the head request - this cancels and dereferences all subrequests. Signed-off-by: Weston Andros Adamson Signed-off-by: Trond Myklebust --- fs/nfs/write.c | 41 +++++++++++++++++++++-------------------- 1 file changed, 21 insertions(+), 20 deletions(-) diff --git a/fs/nfs/write.c b/fs/nfs/write.c index 9f4424c464a05..bdc4db23951eb 100644 --- a/fs/nfs/write.c +++ b/fs/nfs/write.c @@ -1799,27 +1799,28 @@ int nfs_wb_page_cancel(struct inode *inode, struct page *page) struct nfs_page *req; int ret = 0; - for (;;) { - wait_on_page_writeback(page); - req = nfs_page_find_head_request(page); - if (req == NULL) - break; - if (nfs_lock_request(req)) { - nfs_clear_request_commit(req); - nfs_inode_remove_request(req); - /* - * In case nfs_inode_remove_request has marked the - * page as being dirty - */ - cancel_dirty_page(page, PAGE_CACHE_SIZE); - nfs_unlock_and_release_request(req); - break; - } - ret = nfs_wait_on_request(req); - nfs_release_request(req); - if (ret < 0) - break; + wait_on_page_writeback(page); + + /* blocking call to cancel all requests and join to a single (head) + * request */ + req = nfs_lock_and_join_requests(page, false); + + if (IS_ERR(req)) { + ret = PTR_ERR(req); + } else if (req) { + /* all requests from this page have been cancelled by + * nfs_lock_and_join_requests, so just remove the head + * request from the inode / page_private pointer and + * release it */ + nfs_inode_remove_request(req); + /* + * In case nfs_inode_remove_request has marked the + * page as being dirty + */ + cancel_dirty_page(page, PAGE_CACHE_SIZE); + nfs_unlock_and_release_request(req); } + return ret; } From aafe37504c70954fc104c88d9d15d553572dae69 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Sat, 12 Jul 2014 17:23:39 -0400 Subject: [PATCH 150/268] NFS: Remove 2 unused variables Cc: Weston Andros Adamson Signed-off-by: Trond Myklebust --- fs/nfs/direct.c | 2 -- fs/nfs/write.c | 2 -- 2 files changed, 4 deletions(-) diff --git a/fs/nfs/direct.c b/fs/nfs/direct.c index 8f98138cbc438..f11b9eed0de10 100644 --- a/fs/nfs/direct.c +++ b/fs/nfs/direct.c @@ -756,7 +756,6 @@ static void nfs_direct_write_completion(struct nfs_pgio_header *hdr) spin_unlock(&dreq->lock); while (!list_empty(&hdr->pages)) { - bool do_destroy = true; req = nfs_list_entry(hdr->pages.next); nfs_list_remove_request(req); @@ -765,7 +764,6 @@ static void nfs_direct_write_completion(struct nfs_pgio_header *hdr) case NFS_IOHDR_NEED_COMMIT: kref_get(&req->wb_kref); nfs_mark_request_commit(req, hdr->lseg, &cinfo); - do_destroy = false; } nfs_unlock_and_release_request(req); } diff --git a/fs/nfs/write.c b/fs/nfs/write.c index bdc4db23951eb..5e2f10304548e 100644 --- a/fs/nfs/write.c +++ b/fs/nfs/write.c @@ -868,7 +868,6 @@ static void nfs_write_completion(struct nfs_pgio_header *hdr) { struct nfs_commit_info cinfo; unsigned long bytes = 0; - bool do_destroy; if (test_bit(NFS_IOHDR_REDO, &hdr->flags)) goto out; @@ -898,7 +897,6 @@ static void nfs_write_completion(struct nfs_pgio_header *hdr) next: nfs_unlock_request(req); nfs_end_page_writeback(req); - do_destroy = !test_bit(NFS_IOHDR_NEED_COMMIT, &hdr->flags); nfs_release_request(req); } out: From 46c1376db1b85ae412a7917cec148c6e60f79428 Mon Sep 17 00:00:00 2001 From: Steve Wise Date: Fri, 20 Jun 2014 14:26:25 -0500 Subject: [PATCH 151/268] RDMA/cxgb4: Call iwpm_init() only once We need to only register with the iwpm core once. Currently it is being done for every adapter, which causes a failure for each adapter but the first, making multiple adapters unusable. Fixes: 9eccfe109b27 ("RDMA/cxgb4: Add support for iWARP Port Mapper user space service") Signed-off-by: Steve Wise Signed-off-by: Roland Dreier --- drivers/infiniband/hw/cxgb4/cm.c | 2 +- drivers/infiniband/hw/cxgb4/device.c | 17 ++++++++++------- drivers/infiniband/hw/cxgb4/iw_cxgb4.h | 2 +- 3 files changed, 12 insertions(+), 9 deletions(-) diff --git a/drivers/infiniband/hw/cxgb4/cm.c b/drivers/infiniband/hw/cxgb4/cm.c index 6a93280250d16..768a0fb67dd6d 100644 --- a/drivers/infiniband/hw/cxgb4/cm.c +++ b/drivers/infiniband/hw/cxgb4/cm.c @@ -3925,7 +3925,7 @@ int __init c4iw_cm_init(void) return 0; } -void __exit c4iw_cm_term(void) +void c4iw_cm_term(void) { WARN_ON(!list_empty(&timeout_list)); flush_workqueue(workq); diff --git a/drivers/infiniband/hw/cxgb4/device.c b/drivers/infiniband/hw/cxgb4/device.c index 16b75de4b2def..7db82b24302b6 100644 --- a/drivers/infiniband/hw/cxgb4/device.c +++ b/drivers/infiniband/hw/cxgb4/device.c @@ -730,7 +730,6 @@ static void c4iw_dealloc(struct uld_ctx *ctx) if (ctx->dev->rdev.oc_mw_kva) iounmap(ctx->dev->rdev.oc_mw_kva); ib_dealloc_device(&ctx->dev->ibdev); - iwpm_exit(RDMA_NL_C4IW); ctx->dev = NULL; } @@ -827,12 +826,6 @@ static struct c4iw_dev *c4iw_alloc(const struct cxgb4_lld_info *infop) setup_debugfs(devp); } - ret = iwpm_init(RDMA_NL_C4IW); - if (ret) { - pr_err("port mapper initialization failed with %d\n", ret); - ib_dealloc_device(&devp->ibdev); - return ERR_PTR(ret); - } return devp; } @@ -1333,6 +1326,15 @@ static int __init c4iw_init_module(void) pr_err("%s[%u]: Failed to add netlink callback\n" , __func__, __LINE__); + err = iwpm_init(RDMA_NL_C4IW); + if (err) { + pr_err("port mapper initialization failed with %d\n", err); + ibnl_remove_client(RDMA_NL_C4IW); + c4iw_cm_term(); + debugfs_remove_recursive(c4iw_debugfs_root); + return err; + } + cxgb4_register_uld(CXGB4_ULD_RDMA, &c4iw_uld_info); return 0; @@ -1350,6 +1352,7 @@ static void __exit c4iw_exit_module(void) } mutex_unlock(&dev_mutex); cxgb4_unregister_uld(CXGB4_ULD_RDMA); + iwpm_exit(RDMA_NL_C4IW); ibnl_remove_client(RDMA_NL_C4IW); c4iw_cm_term(); debugfs_remove_recursive(c4iw_debugfs_root); diff --git a/drivers/infiniband/hw/cxgb4/iw_cxgb4.h b/drivers/infiniband/hw/cxgb4/iw_cxgb4.h index 125bc5d1e175b..361fff7a07427 100644 --- a/drivers/infiniband/hw/cxgb4/iw_cxgb4.h +++ b/drivers/infiniband/hw/cxgb4/iw_cxgb4.h @@ -908,7 +908,7 @@ int c4iw_destroy_ctrl_qp(struct c4iw_rdev *rdev); int c4iw_register_device(struct c4iw_dev *dev); void c4iw_unregister_device(struct c4iw_dev *dev); int __init c4iw_cm_init(void); -void __exit c4iw_cm_term(void); +void c4iw_cm_term(void); void c4iw_release_dev_ucontext(struct c4iw_rdev *rdev, struct c4iw_dev_ucontext *uctx); void c4iw_init_dev_ucontext(struct c4iw_rdev *rdev, From 655fc39bf40331e13503bed85c9ed0278bc35575 Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Wed, 9 Jul 2014 21:04:00 +0200 Subject: [PATCH 152/268] firewire: IEEE 1394 (FireWire) support should depend on HAS_DMA Commit b3d681a4fc108f9653bbb44e4f4e72db2b8a5734 ("firewire: Use COMPILE_TEST for build testing") added COMPILE_TEST as an alternative dependency for the purpose of build testing the firewire core. However, this bypasses all other implicit dependencies assumed by PCI, like HAS_DMA. If NO_DMA=y: drivers/built-in.o: In function `fw_iso_buffer_destroy': (.text+0x36a096): undefined reference to `dma_unmap_page' drivers/built-in.o: In function `fw_iso_buffer_map_dma': (.text+0x36a164): undefined reference to `dma_map_page' drivers/built-in.o: In function `fw_iso_buffer_map_dma': (.text+0x36a172): undefined reference to `dma_mapping_error' drivers/built-in.o: In function `sbp2_send_management_orb': sbp2.c:(.text+0x36c6b4): undefined reference to `dma_map_single' sbp2.c:(.text+0x36c6c8): undefined reference to `dma_mapping_error' sbp2.c:(.text+0x36c772): undefined reference to `dma_map_single' sbp2.c:(.text+0x36c786): undefined reference to `dma_mapping_error' sbp2.c:(.text+0x36c854): undefined reference to `dma_unmap_single' sbp2.c:(.text+0x36c872): undefined reference to `dma_unmap_single' drivers/built-in.o: In function `sbp2_map_scatterlist': sbp2.c:(.text+0x36ccbc): undefined reference to `scsi_dma_map' sbp2.c:(.text+0x36cd36): undefined reference to `dma_map_single' sbp2.c:(.text+0x36cd4e): undefined reference to `dma_mapping_error' sbp2.c:(.text+0x36cd84): undefined reference to `scsi_dma_unmap' drivers/built-in.o: In function `sbp2_unmap_scatterlist': sbp2.c:(.text+0x36cda6): undefined reference to `scsi_dma_unmap' sbp2.c:(.text+0x36cdc6): undefined reference to `dma_unmap_single' drivers/built-in.o: In function `complete_command_orb': sbp2.c:(.text+0x36d6ac): undefined reference to `dma_unmap_single' drivers/built-in.o: In function `sbp2_scsi_queuecommand': sbp2.c:(.text+0x36d8e0): undefined reference to `dma_map_single' sbp2.c:(.text+0x36d8f6): undefined reference to `dma_mapping_error' Add an explicit dependency on HAS_DMA to fix this. Signed-off-by: Geert Uytterhoeven Reviewed-by: Jean Delvare Signed-off-by: Stefan Richter --- drivers/firewire/Kconfig | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/firewire/Kconfig b/drivers/firewire/Kconfig index 4199849e37585..145974f9662b6 100644 --- a/drivers/firewire/Kconfig +++ b/drivers/firewire/Kconfig @@ -1,4 +1,5 @@ menu "IEEE 1394 (FireWire) support" + depends on HAS_DMA depends on PCI || COMPILE_TEST # firewire-core does not depend on PCI but is # not useful without PCI controller driver From f563b89b182594f827b4100bd34f916339785a77 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Sun, 13 Jul 2014 15:13:19 -0400 Subject: [PATCH 153/268] NFS: Don't reset pg_moreio in __nfs_pageio_add_request Once we've started sending unstable NFS writes, we do not want to clear pg_moreio, or we may end up sending the very last request as a stable write if the commit lists are still empty. Do, however, reset pg_moreio in the case where we end up having to recoalesce the write if an attempt to use pNFS failed. Signed-off-by: Trond Myklebust --- fs/nfs/pagelist.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/nfs/pagelist.c b/fs/nfs/pagelist.c index 0aefc8102b6b4..17fab89f63589 100644 --- a/fs/nfs/pagelist.c +++ b/fs/nfs/pagelist.c @@ -935,7 +935,6 @@ static int __nfs_pageio_add_request(struct nfs_pageio_descriptor *desc, nfs_pageio_doio(desc); if (desc->pg_error < 0) return 0; - desc->pg_moreio = 0; if (desc->pg_recoalesce) return 0; /* retry add_request for this subreq */ @@ -982,6 +981,7 @@ static int nfs_do_recoalesce(struct nfs_pageio_descriptor *desc) desc->pg_count = 0; desc->pg_base = 0; desc->pg_recoalesce = 0; + desc->pg_moreio = 0; while (!list_empty(&head)) { struct nfs_page *req; From 54c39e9ba3a93e3848ad8f9d082c39010cfc5e73 Mon Sep 17 00:00:00 2001 From: Thomas Petazzoni Date: Wed, 2 Jul 2014 15:16:32 +0200 Subject: [PATCH 154/268] mtd: nand: reduce the warning noise when the ECC is too weak In commit 67a9ad9b8a6f ("mtd: nand: Warn the user if the selected ECC strength is too weak"), a check was added to inform the user when the ECC used for a NAND device is weaker than the recommended ECC advertised by the NAND chip. However, the warning uses WARN_ON(), which has two undesirable side-effects: - It just prints to the kernel log the fact that there is a warning in this file, at this line, but it doesn't explain anything about the warning itself. - It dumps a stack trace which is very noisy, for something that the user is most likely not able to fix. If a certain ECC used by the kernel is weaker than the advertised one, it's most likely to make sure the kernel uses an ECC that is compatible with the one used by the bootloader, and changing the bootloader may not necessarily be easy. Therefore, normal users would not be able to do anything to fix this very noisy warning, and will have to suffer from it at every kernel boot. At least every time I see this stack trace in my kernel boot log, I wonder what new thing is broken, just to realize that it's once again this NAND ECC warning. Therefore, this commit turns: ------------[ cut here ]------------ WARNING: CPU: 0 PID: 1 at /home/thomas/projets/linux-2.6/drivers/mtd/nand/nand_base.c:4051 nand_scan_tail+0x538/0x780() Modules linked in: CPU: 0 PID: 1 Comm: swapper Not tainted 3.16.0-rc3-dirty #4 [] (unwind_backtrace) from [] (show_stack+0x10/0x14) [] (show_stack) from [] (warn_slowpath_common+0x6c/0x8c) [] (warn_slowpath_common) from [] (warn_slowpath_null+0x1c/0x24) [] (warn_slowpath_null) from [] (nand_scan_tail+0x538/0x780) [] (nand_scan_tail) from [] (orion_nand_probe+0x224/0x2e4) [] (orion_nand_probe) from [] (platform_drv_probe+0x18/0x4c) [] (platform_drv_probe) from [] (really_probe+0x80/0x218) [] (really_probe) from [] (__driver_attach+0x98/0x9c) [] (__driver_attach) from [] (bus_for_each_dev+0x64/0x94) [] (bus_for_each_dev) from [] (bus_add_driver+0x144/0x1ec) [] (bus_add_driver) from [] (driver_register+0x78/0xf8) [] (driver_register) from [] (platform_driver_probe+0x20/0xb8) [] (platform_driver_probe) from [] (do_one_initcall+0x80/0x1d8) [] (do_one_initcall) from [] (kernel_init_freeable+0xf4/0x1b4) [] (kernel_init_freeable) from [] (kernel_init+0x8/0xec) [] (kernel_init) from [] (ret_from_fork+0x14/0x24) ---[ end trace 62f87d875aceccb4 ]--- Into the much shorter, and much more useful: nand: WARNING: MT29F2G08ABAEAWP: the ECC used on your system is too weak compared to the one required by the NAND chip Signed-off-by: Thomas Petazzoni Signed-off-by: Brian Norris --- drivers/mtd/nand/nand_base.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/mtd/nand/nand_base.c b/drivers/mtd/nand/nand_base.c index 41167e9e991e4..4f3e80c68a266 100644 --- a/drivers/mtd/nand/nand_base.c +++ b/drivers/mtd/nand/nand_base.c @@ -4047,8 +4047,10 @@ int nand_scan_tail(struct mtd_info *mtd) ecc->layout->oobavail += ecc->layout->oobfree[i].length; mtd->oobavail = ecc->layout->oobavail; - /* ECC sanity check: warn noisily if it's too weak */ - WARN_ON(!nand_ecc_strength_good(mtd)); + /* ECC sanity check: warn if it's too weak */ + if (!nand_ecc_strength_good(mtd)) + pr_warn("WARNING: %s: the ECC used on your system is too weak compared to the one required by the NAND chip\n", + mtd->name); /* * Set the number of read / write steps for one page depending on ECC From 812c5fa82bae9f377f49000d7636c19a8b61735c Mon Sep 17 00:00:00 2001 From: Andrea Adami Date: Mon, 2 Jun 2014 23:38:35 +0200 Subject: [PATCH 155/268] mtd: cfi_cmdset_0001.c: add support for Sharp LH28F640BF NOR This family of chips was long ago supported by the pre-cfi driver. CFI code tested on several Zaurus SL-5500 (Collie) 2x16 on 32 bit bus. Function is_LH28F640BF() mimics is_m29ew() from cmdset_0002.c Buffer write fixes as seen in 2007 patch c/o Anti Sullin artecdesign.ee> http://comments.gmane.org/gmane.linux.ports.arm.kernel/36733 [Brian: this patch is semi-urgent, because the following patch switches to using CFI detection for a chip which (until now) is unsupported by the CFI driver 9218310 ARM: 8084/1: sa1100: collie: revert back to cfi_probe ] Signed-off-by: Andrea Adami Signed-off-by: Brian Norris --- drivers/mtd/chips/cfi_cmdset_0001.c | 43 +++++++++++++++++++++++++++++ 1 file changed, 43 insertions(+) diff --git a/drivers/mtd/chips/cfi_cmdset_0001.c b/drivers/mtd/chips/cfi_cmdset_0001.c index e4ec355704a6c..a7543ba3e1904 100644 --- a/drivers/mtd/chips/cfi_cmdset_0001.c +++ b/drivers/mtd/chips/cfi_cmdset_0001.c @@ -52,6 +52,11 @@ /* Atmel chips */ #define AT49BV640D 0x02de #define AT49BV640DT 0x02db +/* Sharp chips */ +#define LH28F640BFHE_PTTL90 0x00b0 +#define LH28F640BFHE_PBTL90 0x00b1 +#define LH28F640BFHE_PTTL70A 0x00b2 +#define LH28F640BFHE_PBTL70A 0x00b3 static int cfi_intelext_read (struct mtd_info *, loff_t, size_t, size_t *, u_char *); static int cfi_intelext_write_words(struct mtd_info *, loff_t, size_t, size_t *, const u_char *); @@ -258,6 +263,36 @@ static void fixup_st_m28w320cb(struct mtd_info *mtd) (cfi->cfiq->EraseRegionInfo[1] & 0xffff0000) | 0x3e; }; +static int is_LH28F640BF(struct cfi_private *cfi) +{ + /* Sharp LH28F640BF Family */ + if (cfi->mfr == CFI_MFR_SHARP && ( + cfi->id == LH28F640BFHE_PTTL90 || cfi->id == LH28F640BFHE_PBTL90 || + cfi->id == LH28F640BFHE_PTTL70A || cfi->id == LH28F640BFHE_PBTL70A)) + return 1; + return 0; +} + +static void fixup_LH28F640BF(struct mtd_info *mtd) +{ + struct map_info *map = mtd->priv; + struct cfi_private *cfi = map->fldrv_priv; + struct cfi_pri_intelext *extp = cfi->cmdset_priv; + + /* Reset the Partition Configuration Register on LH28F640BF + * to a single partition (PCR = 0x000): PCR is embedded into A0-A15. */ + if (is_LH28F640BF(cfi)) { + printk(KERN_INFO "Reset Partition Config. Register: 1 Partition of 4 planes\n"); + map_write(map, CMD(0x60), 0); + map_write(map, CMD(0x04), 0); + + /* We have set one single partition thus + * Simultaneous Operations are not allowed */ + printk(KERN_INFO "cfi_cmdset_0001: Simultaneous Operations disabled\n"); + extp->FeatureSupport &= ~512; + } +} + static void fixup_use_point(struct mtd_info *mtd) { struct map_info *map = mtd->priv; @@ -309,6 +344,8 @@ static struct cfi_fixup cfi_fixup_table[] = { { CFI_MFR_ST, 0x00ba, /* M28W320CT */ fixup_st_m28w320ct }, { CFI_MFR_ST, 0x00bb, /* M28W320CB */ fixup_st_m28w320cb }, { CFI_MFR_INTEL, CFI_ID_ANY, fixup_unlock_powerup_lock }, + { CFI_MFR_SHARP, CFI_ID_ANY, fixup_unlock_powerup_lock }, + { CFI_MFR_SHARP, CFI_ID_ANY, fixup_LH28F640BF }, { 0, 0, NULL } }; @@ -1649,6 +1686,12 @@ static int __xipram do_write_buffer(struct map_info *map, struct flchip *chip, initial_adr = adr; cmd_adr = adr & ~(wbufsize-1); + /* Sharp LH28F640BF chips need the first address for the + * Page Buffer Program command. See Table 5 of + * LH28F320BF, LH28F640BF, LH28F128BF Series (Appendix FUM00701) */ + if (is_LH28F640BF(cfi)) + cmd_adr = adr; + /* Let's determine this according to the interleave only once */ write_cmd = (cfi->cfiq->P_ID != P_ID_INTEL_PERFORMANCE) ? CMD(0xe8) : CMD(0xe9); From 5a680fad352525c3461ed1f7e3269d31a9128a07 Mon Sep 17 00:00:00 2001 From: Florian Fainelli Date: Fri, 11 Jul 2014 16:55:15 -0700 Subject: [PATCH 156/268] net: bcmgenet: fix RGMII_MODE_EN bit RGMII_MODE_EN bit was defined to 0, while it is actually 6. It was not much of a problem on older designs where this was a no-op, and the RGMII data-path would always be enabled, but newer GENET controllers need to explicitely enable their RGMII data-pad using this bit. Signed-off-by: Florian Fainelli Signed-off-by: David S. Miller --- drivers/net/ethernet/broadcom/genet/bcmgenet.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/broadcom/genet/bcmgenet.h b/drivers/net/ethernet/broadcom/genet/bcmgenet.h index 0f117105fed16..e23c993b13625 100644 --- a/drivers/net/ethernet/broadcom/genet/bcmgenet.h +++ b/drivers/net/ethernet/broadcom/genet/bcmgenet.h @@ -331,9 +331,9 @@ struct bcmgenet_mib_counters { #define EXT_ENERGY_DET_MASK (1 << 12) #define EXT_RGMII_OOB_CTRL 0x0C -#define RGMII_MODE_EN (1 << 0) #define RGMII_LINK (1 << 4) #define OOB_DISABLE (1 << 5) +#define RGMII_MODE_EN (1 << 6) #define ID_MODE_DIS (1 << 16) #define EXT_GPHY_CTRL 0x1C From 845d6fcc2748855183295e2e6583468d0d0a911e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?fran=C3=A7ois=20romieu?= Date: Sat, 12 Jul 2014 02:54:27 +0200 Subject: [PATCH 157/268] MAINTAINERS: update r8169 maintainer Signed-off-by: Francois Romieu Signed-off-by: David S. Miller --- MAINTAINERS | 1 - 1 file changed, 1 deletion(-) diff --git a/MAINTAINERS b/MAINTAINERS index 23863ce069966..afc1c5aa476f7 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -156,7 +156,6 @@ F: drivers/net/hamradio/6pack.c 8169 10/100/1000 GIGABIT ETHERNET DRIVER M: Realtek linux nic maintainers -M: Francois Romieu L: netdev@vger.kernel.org S: Maintained F: drivers/net/ethernet/realtek/r8169.c From 6b89cddee051945a83cc67436ad1680ba7d9f766 Mon Sep 17 00:00:00 2001 From: Daniel Vetter Date: Wed, 9 Jul 2014 22:35:53 +0200 Subject: [PATCH 158/268] Revert "drm/i915: Don't set the 8to6 dither flag when not scaling" This reverts commit 773875bfb6737982903c42d1ee88cf60af80089c. It is very much needed and the lack of dithering has been reported by a large list of people with various gen2/3 hardware. Also, the original patch was complete non-sense since the WARNING backtraces in the references bugzilla are about gmch_pfit.lvds_border_bits mismatch, not at all about the dither bit. That one seems to work. Cc: Jiri Kosina Cc: Pavel Machek Cc: Hans de Bruin Cc: stable@vger.kernel.org Acked-by: Pavel Machek Signed-off-by: Daniel Vetter --- drivers/gpu/drm/i915/intel_lvds.c | 7 +++++++ drivers/gpu/drm/i915/intel_panel.c | 8 ++++---- 2 files changed, 11 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_lvds.c b/drivers/gpu/drm/i915/intel_lvds.c index 23126023aeba0..5e5a72fca5fbc 100644 --- a/drivers/gpu/drm/i915/intel_lvds.c +++ b/drivers/gpu/drm/i915/intel_lvds.c @@ -111,6 +111,13 @@ static void intel_lvds_get_config(struct intel_encoder *encoder, pipe_config->adjusted_mode.flags |= flags; + /* gen2/3 store dither state in pfit control, needs to match */ + if (INTEL_INFO(dev)->gen < 4) { + tmp = I915_READ(PFIT_CONTROL); + + pipe_config->gmch_pfit.control |= tmp & PANEL_8TO6_DITHER_ENABLE; + } + dotclock = pipe_config->port_clock; if (HAS_PCH_SPLIT(dev_priv->dev)) diff --git a/drivers/gpu/drm/i915/intel_panel.c b/drivers/gpu/drm/i915/intel_panel.c index 628cd8938274c..12b02fe1d0aed 100644 --- a/drivers/gpu/drm/i915/intel_panel.c +++ b/drivers/gpu/drm/i915/intel_panel.c @@ -361,16 +361,16 @@ void intel_gmch_panel_fitting(struct intel_crtc *intel_crtc, pfit_control |= ((intel_crtc->pipe << PFIT_PIPE_SHIFT) | PFIT_FILTER_FUZZY); - /* Make sure pre-965 set dither correctly for 18bpp panels. */ - if (INTEL_INFO(dev)->gen < 4 && pipe_config->pipe_bpp == 18) - pfit_control |= PANEL_8TO6_DITHER_ENABLE; - out: if ((pfit_control & PFIT_ENABLE) == 0) { pfit_control = 0; pfit_pgm_ratios = 0; } + /* Make sure pre-965 set dither correctly for 18bpp panels. */ + if (INTEL_INFO(dev)->gen < 4 && pipe_config->pipe_bpp == 18) + pfit_control |= PANEL_8TO6_DITHER_ENABLE; + pipe_config->gmch_pfit.control = pfit_control; pipe_config->gmch_pfit.pgm_ratios = pfit_pgm_ratios; pipe_config->gmch_pfit.lvds_border_bits = border; From 724cb06fa9b1e1ffd98188275543fdb3b8eaca4f Mon Sep 17 00:00:00 2001 From: Scot Doyle Date: Fri, 11 Jul 2014 22:16:30 +0000 Subject: [PATCH 159/268] drm/i915: Ignore VBT backlight presence check on HP Chromebook 14 commit c675949ec58ca50d5a3ae3c757892f1560f6e896 drm/i915: do not setup backlight if not available according to VBT caused a regression on the HP Chromebook 14 (with Celeron 2955U CPU), which has a misconfigured VBT. Apply quirk to ignore the VBT backlight presence check during backlight setup. Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=79813 Signed-off-by: Scot Doyle Tested-by: Stefan Nagy Cc: Jani Nikula Cc: Daniel Vetter Signed-off-by: Daniel Vetter --- drivers/gpu/drm/i915/intel_display.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c index e27e7804c0b97..82e7d57f0a8ab 100644 --- a/drivers/gpu/drm/i915/intel_display.c +++ b/drivers/gpu/drm/i915/intel_display.c @@ -11673,6 +11673,9 @@ static struct intel_quirk intel_quirks[] = { /* Toshiba CB35 Chromebook (Celeron 2955U) */ { 0x0a06, 0x1179, 0x0a88, quirk_backlight_present }, + + /* HP Chromebook 14 (Celeron 2955U) */ + { 0x0a06, 0x103c, 0x21ed, quirk_backlight_present }, }; static void intel_init_quirks(struct drm_device *dev) From cd50065b3be83a705635550c04e368f2a4cc44d0 Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Mon, 14 Jul 2014 10:45:31 +0200 Subject: [PATCH 160/268] ALSA: hda - Revert stream assignment order for Intel controllers We got a regression report for 3.15.x kernels, and this turned out to be triggered by the fix for stream assignment order. On reporter's machine with Intel controller (8086:1e20) + VIA VT1802 codec, the first playback slot can't work with speaker outputs. But the original commit was actually a fix for AMD controllers where no proper GCAP value is returned, we shouldn't revert the whole commit. Instead, in this patch, a new flag is introduced to determine the stream assignment order, and follow the old behavior for Intel controllers. Fixes: dcb32ecd9a53 ('ALSA: hda - Do not assign streams in reverse order') Reported-and-tested-by: Steven Newbury Cc: [v3.15+] Signed-off-by: Takashi Iwai --- sound/pci/hda/hda_controller.c | 3 ++- sound/pci/hda/hda_intel.c | 2 +- sound/pci/hda/hda_priv.h | 1 + 3 files changed, 4 insertions(+), 2 deletions(-) diff --git a/sound/pci/hda/hda_controller.c b/sound/pci/hda/hda_controller.c index 480bbddbd801b..6df04d91c93cd 100644 --- a/sound/pci/hda/hda_controller.c +++ b/sound/pci/hda/hda_controller.c @@ -193,7 +193,8 @@ azx_assign_device(struct azx *chip, struct snd_pcm_substream *substream) dsp_unlock(azx_dev); return azx_dev; } - if (!res) + if (!res || + (chip->driver_caps & AZX_DCAPS_REVERSE_ASSIGN)) res = azx_dev; } dsp_unlock(azx_dev); diff --git a/sound/pci/hda/hda_intel.c b/sound/pci/hda/hda_intel.c index b6b4e71a0b0bd..d690c26a197c8 100644 --- a/sound/pci/hda/hda_intel.c +++ b/sound/pci/hda/hda_intel.c @@ -227,7 +227,7 @@ enum { /* quirks for Intel PCH */ #define AZX_DCAPS_INTEL_PCH_NOPM \ (AZX_DCAPS_SCH_SNOOP | AZX_DCAPS_BUFSIZE | \ - AZX_DCAPS_COUNT_LPIB_DELAY) + AZX_DCAPS_COUNT_LPIB_DELAY | AZX_DCAPS_REVERSE_ASSIGN) #define AZX_DCAPS_INTEL_PCH \ (AZX_DCAPS_INTEL_PCH_NOPM | AZX_DCAPS_PM_RUNTIME) diff --git a/sound/pci/hda/hda_priv.h b/sound/pci/hda/hda_priv.h index 4a7cb01fa9122..e9d1a5762a55b 100644 --- a/sound/pci/hda/hda_priv.h +++ b/sound/pci/hda/hda_priv.h @@ -186,6 +186,7 @@ enum { SDI0, SDI1, SDI2, SDI3, SDO0, SDO1, SDO2, SDO3 }; #define AZX_DCAPS_BUFSIZE (1 << 21) /* no buffer size alignment */ #define AZX_DCAPS_ALIGN_BUFSIZE (1 << 22) /* buffer size alignment */ #define AZX_DCAPS_4K_BDLE_BOUNDARY (1 << 23) /* BDLE in 4k boundary */ +#define AZX_DCAPS_REVERSE_ASSIGN (1 << 24) /* Assign devices in reverse order */ #define AZX_DCAPS_COUNT_LPIB_DELAY (1 << 25) /* Take LPIB as delay */ #define AZX_DCAPS_PM_RUNTIME (1 << 26) /* runtime PM support */ #define AZX_DCAPS_I915_POWERWELL (1 << 27) /* HSW i915 powerwell support */ From 3b3a1814d1703027f9867d0f5cbbfaf6c7482474 Mon Sep 17 00:00:00 2001 From: Mikulas Patocka Date: Wed, 2 Jul 2014 12:46:23 -0400 Subject: [PATCH 161/268] block: provide compat ioctl for BLKZEROOUT This patch provides the compat BLKZEROOUT ioctl. The argument is a pointer to two uint64_t values, so there is no need to translate it. Signed-off-by: Mikulas Patocka Cc: stable@vger.kernel.org # 3.7+ Acked-by: Martin K. Petersen Signed-off-by: Jens Axboe --- block/compat_ioctl.c | 1 + 1 file changed, 1 insertion(+) diff --git a/block/compat_ioctl.c b/block/compat_ioctl.c index fbd5a67cb7738..a0926a6094b28 100644 --- a/block/compat_ioctl.c +++ b/block/compat_ioctl.c @@ -690,6 +690,7 @@ long compat_blkdev_ioctl(struct file *file, unsigned cmd, unsigned long arg) case BLKROSET: case BLKDISCARD: case BLKSECDISCARD: + case BLKZEROOUT: /* * the ones below are implemented in blkdev_locked_ioctl, * but we call blkdev_ioctl, which gets the lock for us From d3cc7996473a7bdd33256029988ea690754e4e2a Mon Sep 17 00:00:00 2001 From: Amit Shah Date: Thu, 10 Jul 2014 15:42:34 +0530 Subject: [PATCH 162/268] hwrng: fetch randomness only after device init Commit d9e7972619334 "hwrng: add randomness to system from rng sources" added a call to rng_get_data() from the hwrng_register() function. However, some rng devices need initialization before data can be read from them. This commit makes the call to rng_get_data() depend on no init fn pointer being registered by the device. If an init function is registered, this call is made after device init. CC: Kees Cook CC: Jason Cooper CC: Herbert Xu CC: # For v3.15+ Signed-off-by: Amit Shah Reviewed-by: Jason Cooper Signed-off-by: Herbert Xu --- drivers/char/hw_random/core.c | 41 ++++++++++++++++++++++++++++------- 1 file changed, 33 insertions(+), 8 deletions(-) diff --git a/drivers/char/hw_random/core.c b/drivers/char/hw_random/core.c index 334601cc81cf5..2a451b14b3cc6 100644 --- a/drivers/char/hw_random/core.c +++ b/drivers/char/hw_random/core.c @@ -55,16 +55,35 @@ static DEFINE_MUTEX(rng_mutex); static int data_avail; static u8 *rng_buffer; +static inline int rng_get_data(struct hwrng *rng, u8 *buffer, size_t size, + int wait); + static size_t rng_buffer_size(void) { return SMP_CACHE_BYTES < 32 ? 32 : SMP_CACHE_BYTES; } +static void add_early_randomness(struct hwrng *rng) +{ + unsigned char bytes[16]; + int bytes_read; + + bytes_read = rng_get_data(rng, bytes, sizeof(bytes), 1); + if (bytes_read > 0) + add_device_randomness(bytes, bytes_read); +} + static inline int hwrng_init(struct hwrng *rng) { - if (!rng->init) - return 0; - return rng->init(rng); + if (rng->init) { + int ret; + + ret = rng->init(rng); + if (ret) + return ret; + } + add_early_randomness(rng); + return 0; } static inline void hwrng_cleanup(struct hwrng *rng) @@ -304,8 +323,6 @@ int hwrng_register(struct hwrng *rng) { int err = -EINVAL; struct hwrng *old_rng, *tmp; - unsigned char bytes[16]; - int bytes_read; if (rng->name == NULL || (rng->data_read == NULL && rng->read == NULL)) @@ -347,9 +364,17 @@ int hwrng_register(struct hwrng *rng) INIT_LIST_HEAD(&rng->list); list_add_tail(&rng->list, &rng_list); - bytes_read = rng_get_data(rng, bytes, sizeof(bytes), 1); - if (bytes_read > 0) - add_device_randomness(bytes, bytes_read); + if (old_rng && !rng->init) { + /* + * Use a new device's input to add some randomness to + * the system. If this rng device isn't going to be + * used right away, its init function hasn't been + * called yet; so only use the randomness from devices + * that don't need an init callback. + */ + add_early_randomness(rng); + } + out_unlock: mutex_unlock(&rng_mutex); out: From e052dbf554610e2104c5a7518c4d8374bed701bb Mon Sep 17 00:00:00 2001 From: Amit Shah Date: Thu, 10 Jul 2014 15:42:35 +0530 Subject: [PATCH 163/268] hwrng: virtio - ensure reads happen after successful probe The hwrng core asks for random data in the hwrng_register() call itself from commit d9e7972619. This doesn't play well with virtio -- the DRIVER_OK bit is only set by virtio core on a successful probe, and we're not yet out of our probe routine when this call is made. This causes the host to not acknowledge any requests we put in the virtqueue, and the insmod or kernel boot process just waits for data to arrive from the host, which never happens. CC: Kees Cook CC: Jason Cooper CC: Herbert Xu CC: # For v3.15+ Reviewed-by: Jason Cooper Signed-off-by: Amit Shah Signed-off-by: Herbert Xu --- drivers/char/hw_random/core.c | 6 ++++++ drivers/char/hw_random/virtio-rng.c | 10 ++++++++++ 2 files changed, 16 insertions(+) diff --git a/drivers/char/hw_random/core.c b/drivers/char/hw_random/core.c index 2a451b14b3cc6..c4419ea1ab078 100644 --- a/drivers/char/hw_random/core.c +++ b/drivers/char/hw_random/core.c @@ -68,6 +68,12 @@ static void add_early_randomness(struct hwrng *rng) unsigned char bytes[16]; int bytes_read; + /* + * Currently only virtio-rng cannot return data during device + * probe, and that's handled in virtio-rng.c itself. If there + * are more such devices, this call to rng_get_data can be + * made conditional here instead of doing it per-device. + */ bytes_read = rng_get_data(rng, bytes, sizeof(bytes), 1); if (bytes_read > 0) add_device_randomness(bytes, bytes_read); diff --git a/drivers/char/hw_random/virtio-rng.c b/drivers/char/hw_random/virtio-rng.c index f3e71501de540..e9b15bc18b4d1 100644 --- a/drivers/char/hw_random/virtio-rng.c +++ b/drivers/char/hw_random/virtio-rng.c @@ -38,6 +38,8 @@ struct virtrng_info { int index; }; +static bool probe_done; + static void random_recv_done(struct virtqueue *vq) { struct virtrng_info *vi = vq->vdev->priv; @@ -67,6 +69,13 @@ static int virtio_read(struct hwrng *rng, void *buf, size_t size, bool wait) int ret; struct virtrng_info *vi = (struct virtrng_info *)rng->priv; + /* + * Don't ask host for data till we're setup. This call can + * happen during hwrng_register(), after commit d9e7972619. + */ + if (unlikely(!probe_done)) + return 0; + if (!vi->busy) { vi->busy = true; init_completion(&vi->have_data); @@ -137,6 +146,7 @@ static int probe_common(struct virtio_device *vdev) return err; } + probe_done = true; return 0; } From 7188b067576db95445bf4e9498f1bdb2e612dd2f Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Mon, 14 Jul 2014 15:41:25 +0200 Subject: [PATCH 164/268] MAINTAINERS: Add Hans de Goede as ahci-platform maintainer Signed-off-by: Hans de Goede Signed-off-by: Tejun Heo --- MAINTAINERS | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/MAINTAINERS b/MAINTAINERS index 134483f206e42..c92bb80fcdbed 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -7993,6 +7993,16 @@ F: drivers/ata/ F: include/linux/ata.h F: include/linux/libata.h +SERIAL ATA AHCI PLATFORM devices support +M: Hans de Goede +M: Tejun Heo +L: linux-ide@vger.kernel.org +T: git git://git.kernel.org/pub/scm/linux/kernel/git/tj/libata.git +S: Supported +F: drivers/ata/ahci_platform.c +F: drivers/ata/libahci_platform.c +F: include/linux/ahci_platform.h + SERVER ENGINES 10Gbps iSCSI - BladeEngine 2 DRIVER M: Jayamohan Kallickal L: linux-scsi@vger.kernel.org From 27f1b36326bc8b6911e7052bc4b80a10234f0ec5 Mon Sep 17 00:00:00 2001 From: Maxim Patlasov Date: Thu, 10 Jul 2014 15:32:43 +0400 Subject: [PATCH 165/268] fuse: release temporary page if fuse_writepage_locked() failed tmp_page to be freed if fuse_write_file_get() returns NULL. Signed-off-by: Maxim Patlasov Signed-off-by: Miklos Szeredi --- fs/fuse/file.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/fs/fuse/file.c b/fs/fuse/file.c index 96d513e01a5d5..35b6f31ecc388 100644 --- a/fs/fuse/file.c +++ b/fs/fuse/file.c @@ -1722,7 +1722,7 @@ static int fuse_writepage_locked(struct page *page) error = -EIO; req->ff = fuse_write_file_get(fc, fi); if (!req->ff) - goto err_free; + goto err_nofile; fuse_write_fill(req, req->ff, page_offset(page), 0); @@ -1750,6 +1750,8 @@ static int fuse_writepage_locked(struct page *page) return 0; +err_nofile: + __free_page(tmp_page); err_free: fuse_request_free(req); err: From f2b3455e47c72bef80fe584adb9bb9bc5afdd99c Mon Sep 17 00:00:00 2001 From: Fabian Frederick Date: Mon, 23 Jun 2014 18:35:15 +0200 Subject: [PATCH 166/268] fuse: replace count*size kzalloc by kcalloc kcalloc manages count*sizeof overflow. Signed-off-by: Fabian Frederick Signed-off-by: Miklos Szeredi --- fs/fuse/file.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/fs/fuse/file.c b/fs/fuse/file.c index 35b6f31ecc388..1570dad1915d6 100644 --- a/fs/fuse/file.c +++ b/fs/fuse/file.c @@ -1992,8 +1992,8 @@ static int fuse_writepages(struct address_space *mapping, data.ff = NULL; err = -ENOMEM; - data.orig_pages = kzalloc(sizeof(struct page *) * - FUSE_MAX_PAGES_PER_REQ, + data.orig_pages = kcalloc(FUSE_MAX_PAGES_PER_REQ, + sizeof(struct page *), GFP_NOFS); if (!data.orig_pages) goto out; From 1871ee134b73fb4cadab75752a7152ed2813c751 Mon Sep 17 00:00:00 2001 From: Kevin Hao Date: Sat, 12 Jul 2014 12:08:24 +0800 Subject: [PATCH 167/268] libata: support the ata host which implements a queue depth less than 32 The sata on fsl mpc8315e is broken after the commit 8a4aeec8d2d6 ("libata/ahci: accommodate tag ordered controllers"). The reason is that the ata controller on this SoC only implement a queue depth of 16. When issuing the commands in tag order, all the commands in tag 16 ~ 31 are mapped to tag 0 unconditionally and then causes the sata malfunction. It makes no senses to use a 32 queue in software while the hardware has less queue depth. So consider the queue depth implemented by the hardware when requesting a command tag. Fixes: 8a4aeec8d2d6 ("libata/ahci: accommodate tag ordered controllers") Cc: stable@vger.kernel.org Signed-off-by: Kevin Hao Acked-by: Dan Williams Signed-off-by: Tejun Heo --- drivers/ata/libata-core.c | 22 +++++++++++++++++++--- 1 file changed, 19 insertions(+), 3 deletions(-) diff --git a/drivers/ata/libata-core.c b/drivers/ata/libata-core.c index 18d97d5c7d90f..d19c37a7abc93 100644 --- a/drivers/ata/libata-core.c +++ b/drivers/ata/libata-core.c @@ -4787,6 +4787,10 @@ void swap_buf_le16(u16 *buf, unsigned int buf_words) * ata_qc_new - Request an available ATA command, for queueing * @ap: target port * + * Some ATA host controllers may implement a queue depth which is less + * than ATA_MAX_QUEUE. So we shouldn't allocate a tag which is beyond + * the hardware limitation. + * * LOCKING: * None. */ @@ -4794,14 +4798,16 @@ void swap_buf_le16(u16 *buf, unsigned int buf_words) static struct ata_queued_cmd *ata_qc_new(struct ata_port *ap) { struct ata_queued_cmd *qc = NULL; - unsigned int i, tag; + unsigned int i, tag, max_queue; + + max_queue = ap->scsi_host->can_queue; /* no command while frozen */ if (unlikely(ap->pflags & ATA_PFLAG_FROZEN)) return NULL; - for (i = 0; i < ATA_MAX_QUEUE; i++) { - tag = (i + ap->last_tag + 1) % ATA_MAX_QUEUE; + for (i = 0, tag = ap->last_tag + 1; i < max_queue; i++, tag++) { + tag = tag < max_queue ? tag : 0; /* the last tag is reserved for internal command. */ if (tag == ATA_TAG_INTERNAL) @@ -6169,6 +6175,16 @@ int ata_host_register(struct ata_host *host, struct scsi_host_template *sht) { int i, rc; + /* + * The max queue supported by hardware must not be greater than + * ATA_MAX_QUEUE. + */ + if (sht->can_queue > ATA_MAX_QUEUE) { + dev_err(host->dev, "BUG: the hardware max queue is too large\n"); + WARN_ON(1); + return -EINVAL; + } + /* host must have been started */ if (!(host->flags & ATA_HOST_STARTED)) { dev_err(host->dev, "BUG: trying to register unstarted host\n"); From 263782c1c95bbddbb022dc092fd89a36bb8d5577 Mon Sep 17 00:00:00 2001 From: Benjamin LaHaise Date: Mon, 14 Jul 2014 12:49:26 -0400 Subject: [PATCH 168/268] aio: protect reqs_available updates from changes in interrupt handlers As of commit f8567a3845ac05bb28f3c1b478ef752762bd39ef it is now possible to have put_reqs_available() called from irq context. While put_reqs_available() is per cpu, it did not protect itself from interrupts on the same CPU. This lead to aio_complete() corrupting the available io requests count when run under a heavy O_DIRECT workloads as reported by Robert Elliott. Fix this by disabling irq updates around the per cpu batch updates of reqs_available. Many thanks to Robert and folks for testing and tracking this down. Reported-by: Robert Elliot Tested-by: Robert Elliot Signed-off-by: Benjamin LaHaise Cc: Jens Axboe , Christoph Hellwig Cc: stable@vger.kenel.org --- fs/aio.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/fs/aio.c b/fs/aio.c index 955947ef3e026..1c9c5f0a9e2be 100644 --- a/fs/aio.c +++ b/fs/aio.c @@ -830,16 +830,20 @@ void exit_aio(struct mm_struct *mm) static void put_reqs_available(struct kioctx *ctx, unsigned nr) { struct kioctx_cpu *kcpu; + unsigned long flags; preempt_disable(); kcpu = this_cpu_ptr(ctx->cpu); + local_irq_save(flags); kcpu->reqs_available += nr; + while (kcpu->reqs_available >= ctx->req_batch * 2) { kcpu->reqs_available -= ctx->req_batch; atomic_add(ctx->req_batch, &ctx->reqs_available); } + local_irq_restore(flags); preempt_enable(); } @@ -847,10 +851,12 @@ static bool get_reqs_available(struct kioctx *ctx) { struct kioctx_cpu *kcpu; bool ret = false; + unsigned long flags; preempt_disable(); kcpu = this_cpu_ptr(ctx->cpu); + local_irq_save(flags); if (!kcpu->reqs_available) { int old, avail = atomic_read(&ctx->reqs_available); @@ -869,6 +875,7 @@ static bool get_reqs_available(struct kioctx *ctx) ret = true; kcpu->reqs_available--; out: + local_irq_restore(flags); preempt_enable(); return ret; } From 9c8958bc24e241de2c0d4740e6dea861fe47daa5 Mon Sep 17 00:00:00 2001 From: Daniel Vetter Date: Mon, 14 Jul 2014 19:35:31 +0200 Subject: [PATCH 169/268] drm/i915: Track the primary plane correctly when reassigning planes MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit 98ec77397a5c68ce753dc283aaa6f4742328bcdd Author: Ville Syrjälä Date: Wed Apr 30 17:43:01 2014 +0300 drm/i915: Make primary_enabled match the actual hardware state introduced more accurate tracking of the primary plane and some checks. It missed the plane->pipe reassignement code for gen2/3 though, which the checks caught and resulted in WARNING backtraces. Since we only use this path if the plane is on and on the wrong pipe we can just always set the tracking bit to "enabled". Reported-and-tested-by: Paul Bolle Cc: Paul Bolle Signed-off-by: Daniel Vetter --- drivers/gpu/drm/i915/intel_display.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c index 82e7d57f0a8ab..f0be855ddf45c 100644 --- a/drivers/gpu/drm/i915/intel_display.c +++ b/drivers/gpu/drm/i915/intel_display.c @@ -11914,6 +11914,7 @@ static void intel_sanitize_crtc(struct intel_crtc *crtc) * ... */ plane = crtc->plane; crtc->plane = !plane; + crtc->primary_enabled = true; dev_priv->display.crtc_disable(&crtc->base); crtc->plane = plane; From ee14b644daaa58afe1e91bb9ebd9cf1b18d1f5fa Mon Sep 17 00:00:00 2001 From: Axel Lin Date: Wed, 9 Jul 2014 09:18:59 +0800 Subject: [PATCH 170/268] hwmon: (da9052) Don't use dash in the name attribute Dashes are not allowed in hwmon name attributes. Use "da9052" instead of "da9052-hwmon". Signed-off-by: Axel Lin Cc: stable@vger.kernel.org Signed-off-by: Guenter Roeck --- drivers/hwmon/da9052-hwmon.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/hwmon/da9052-hwmon.c b/drivers/hwmon/da9052-hwmon.c index afd31042b4520..d14ab3c45daa3 100644 --- a/drivers/hwmon/da9052-hwmon.c +++ b/drivers/hwmon/da9052-hwmon.c @@ -194,7 +194,7 @@ static ssize_t da9052_hwmon_show_name(struct device *dev, struct device_attribute *devattr, char *buf) { - return sprintf(buf, "da9052-hwmon\n"); + return sprintf(buf, "da9052\n"); } static ssize_t show_label(struct device *dev, From 6b00f440dd678d786389a7100a2e03fe44478431 Mon Sep 17 00:00:00 2001 From: Axel Lin Date: Wed, 9 Jul 2014 09:22:54 +0800 Subject: [PATCH 171/268] hwmon: (da9055) Don't use dash in the name attribute Dashes are not allowed in hwmon name attributes. Use "da9055" instead of "da9055-hwmon". Signed-off-by: Axel Lin Cc: stable@vger.kernel.org Signed-off-by: Guenter Roeck --- drivers/hwmon/da9055-hwmon.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/hwmon/da9055-hwmon.c b/drivers/hwmon/da9055-hwmon.c index 73b3865f1207a..35eb7738d7119 100644 --- a/drivers/hwmon/da9055-hwmon.c +++ b/drivers/hwmon/da9055-hwmon.c @@ -204,7 +204,7 @@ static ssize_t da9055_hwmon_show_name(struct device *dev, struct device_attribute *devattr, char *buf) { - return sprintf(buf, "da9055-hwmon\n"); + return sprintf(buf, "da9055\n"); } static ssize_t show_label(struct device *dev, From 2843768b701971ab10e62c77d5c75ad7c306f1bd Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Mon, 14 Jul 2014 19:35:45 +0200 Subject: [PATCH 172/268] Revert "ACPI / video: change acpi-video brightness_switch_enabled default to 0" MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This reverts commit 886129a8eebeb (ACPI / video: change acpi-video brightness_switch_enabled default to 0) as it is reported to cause problems to happen. Fixes: 886129a8eebeb (ACPI / video: change acpi-video brightness_switch_enabled default to 0) Link: http://marc.info/?l=linux-acpi&m=140534286826819&w=2 Reported by: Bjørn Mork Signed-off-by: Rafael J. Wysocki --- Documentation/kernel-parameters.txt | 2 +- drivers/acpi/video.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt index c1b9aa8c5a52e..e332e718cad64 100644 --- a/Documentation/kernel-parameters.txt +++ b/Documentation/kernel-parameters.txt @@ -3526,7 +3526,7 @@ bytes respectively. Such letter suffixes can also be entirely omitted. the allocated input device; If set to 0, video driver will only send out the event without touching backlight brightness level. - default: 0 + default: 1 virtio_mmio.device= [VMMIO] Memory mapped virtio (platform) device. diff --git a/drivers/acpi/video.c b/drivers/acpi/video.c index 071c1dfb93f3b..29649c1948860 100644 --- a/drivers/acpi/video.c +++ b/drivers/acpi/video.c @@ -68,7 +68,7 @@ MODULE_AUTHOR("Bruno Ducrot"); MODULE_DESCRIPTION("ACPI Video Driver"); MODULE_LICENSE("GPL"); -static bool brightness_switch_enabled; +static bool brightness_switch_enabled = 1; module_param(brightness_switch_enabled, bool, 0644); /* From 2448e3493cb3874baa90725c87869455ebf11cd2 Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Fri, 11 Jul 2014 21:06:38 +0200 Subject: [PATCH 173/268] tracing: instance_rmdir() leaks ftrace_event_file->filter instance_rmdir() path destroys the event files but forgets to free file->filter. Change remove_event_file_dir() to free_event_filter(). Link: http://lkml.kernel.org/p/20140711190638.GA19517@redhat.com Cc: Masami Hiramatsu Cc: Namhyung Kim Cc: Srikar Dronamraju Cc: Tom Zanussi Cc: "zhangwei(Jovi)" Cc: stable@vger.kernel.org # 3.11+ Fixes: f6a84bdc75b5 "tracing: Introduce remove_event_file_dir()" Signed-off-by: Oleg Nesterov Signed-off-by: Steven Rostedt --- kernel/trace/trace_events.c | 1 + 1 file changed, 1 insertion(+) diff --git a/kernel/trace/trace_events.c b/kernel/trace/trace_events.c index f99e0b3bca8cb..2de53628689f5 100644 --- a/kernel/trace/trace_events.c +++ b/kernel/trace/trace_events.c @@ -470,6 +470,7 @@ static void remove_event_file_dir(struct ftrace_event_file *file) list_del(&file->list); remove_subsystem(file->system); + free_event_filter(file->filter); kmem_cache_free(file_cachep, file); } From 8762e5092828c4dc0f49da5a47a644c670df77f3 Mon Sep 17 00:00:00 2001 From: Boris Ostrovsky Date: Wed, 9 Jul 2014 13:18:18 -0400 Subject: [PATCH 174/268] x86/espfix/xen: Fix allocation of pages for paravirt page tables init_espfix_ap() is currently off by one level when informing hypervisor that allocated pages will be used for ministacks' page tables. The most immediate effect of this on a PV guest is that if 'stack_page = __get_free_page()' returns a non-zeroed-out page the hypervisor will refuse to use it for a page table (which it shouldn't be anyway). This will result in warnings by both Xen and Linux. More importantly, a subsequent write to that page (again, by a PV guest) is likely to result in fatal page fault. Signed-off-by: Boris Ostrovsky Link: http://lkml.kernel.org/r/1404926298-5565-1-git-send-email-boris.ostrovsky@oracle.com Reviewed-by: Konrad Rzeszutek Wilk Signed-off-by: H. Peter Anvin Cc: --- arch/x86/kernel/espfix_64.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/arch/x86/kernel/espfix_64.c b/arch/x86/kernel/espfix_64.c index 6afbb16e9b794..94d857fb10339 100644 --- a/arch/x86/kernel/espfix_64.c +++ b/arch/x86/kernel/espfix_64.c @@ -175,7 +175,7 @@ void init_espfix_ap(void) if (!pud_present(pud)) { pmd_p = (pmd_t *)__get_free_page(PGALLOC_GFP); pud = __pud(__pa(pmd_p) | (PGTABLE_PROT & ptemask)); - paravirt_alloc_pud(&init_mm, __pa(pmd_p) >> PAGE_SHIFT); + paravirt_alloc_pmd(&init_mm, __pa(pmd_p) >> PAGE_SHIFT); for (n = 0; n < ESPFIX_PUD_CLONES; n++) set_pud(&pud_p[n], pud); } @@ -185,7 +185,7 @@ void init_espfix_ap(void) if (!pmd_present(pmd)) { pte_p = (pte_t *)__get_free_page(PGALLOC_GFP); pmd = __pmd(__pa(pte_p) | (PGTABLE_PROT & ptemask)); - paravirt_alloc_pmd(&init_mm, __pa(pte_p) >> PAGE_SHIFT); + paravirt_alloc_pte(&init_mm, __pa(pte_p) >> PAGE_SHIFT); for (n = 0; n < ESPFIX_PMD_CLONES; n++) set_pmd(&pmd_p[n], pmd); } @@ -193,7 +193,6 @@ void init_espfix_ap(void) pte_p = pte_offset_kernel(&pmd, addr); stack_page = (void *)__get_free_page(GFP_KERNEL); pte = __pte(__pa(stack_page) | (__PAGE_KERNEL_RO & ptemask)); - paravirt_alloc_pte(&init_mm, __pa(stack_page) >> PAGE_SHIFT); for (n = 0; n < ESPFIX_PTE_CLONES; n++) set_pte(&pte_p[n*PTE_STRIDE], pte); From aa182e64f16fc29a4984c2d79191b161888bbd9b Mon Sep 17 00:00:00 2001 From: Dave Chinner Date: Tue, 15 Jul 2014 07:08:10 +1000 Subject: [PATCH 175/268] Revert "xfs: block allocation work needs to be kswapd aware" This reverts commit 1f6d64829db78a7e1d63e15c9f48f0a5d2b5a679. This commit resulted in regressions in performance in low memory situations where kswapd was doing writeback of delayed allocation blocks. It resulted in significant parallelism of the kswapd work and with the special kswapd flags meant that hundreds of active allocation could dip into kswapd specific memory reserves and avoid being throttled. This cause a large amount of performance variation, as well as random OOM-killer invocations that didn't previously exist. Signed-off-by: Dave Chinner Reviewed-by: Brian Foster Signed-off-by: Dave Chinner --- fs/xfs/xfs_bmap_util.c | 16 +++------------- fs/xfs/xfs_bmap_util.h | 13 ++++++------- 2 files changed, 9 insertions(+), 20 deletions(-) diff --git a/fs/xfs/xfs_bmap_util.c b/fs/xfs/xfs_bmap_util.c index 703b3ec1796cd..057f671811d61 100644 --- a/fs/xfs/xfs_bmap_util.c +++ b/fs/xfs/xfs_bmap_util.c @@ -258,23 +258,14 @@ xfs_bmapi_allocate_worker( struct xfs_bmalloca *args = container_of(work, struct xfs_bmalloca, work); unsigned long pflags; - unsigned long new_pflags = PF_FSTRANS; - /* - * we are in a transaction context here, but may also be doing work - * in kswapd context, and hence we may need to inherit that state - * temporarily to ensure that we don't block waiting for memory reclaim - * in any way. - */ - if (args->kswapd) - new_pflags |= PF_MEMALLOC | PF_SWAPWRITE | PF_KSWAPD; - - current_set_flags_nested(&pflags, new_pflags); + /* we are in a transaction context here */ + current_set_flags_nested(&pflags, PF_FSTRANS); args->result = __xfs_bmapi_allocate(args); complete(args->done); - current_restore_flags_nested(&pflags, new_pflags); + current_restore_flags_nested(&pflags, PF_FSTRANS); } /* @@ -293,7 +284,6 @@ xfs_bmapi_allocate( args->done = &done; - args->kswapd = current_is_kswapd(); INIT_WORK_ONSTACK(&args->work, xfs_bmapi_allocate_worker); queue_work(xfs_alloc_wq, &args->work); wait_for_completion(&done); diff --git a/fs/xfs/xfs_bmap_util.h b/fs/xfs/xfs_bmap_util.h index 075f72232a64a..935ed2b24edfb 100644 --- a/fs/xfs/xfs_bmap_util.h +++ b/fs/xfs/xfs_bmap_util.h @@ -50,13 +50,12 @@ struct xfs_bmalloca { xfs_extlen_t total; /* total blocks needed for xaction */ xfs_extlen_t minlen; /* minimum allocation size (blocks) */ xfs_extlen_t minleft; /* amount must be left after alloc */ - bool eof; /* set if allocating past last extent */ - bool wasdel; /* replacing a delayed allocation */ - bool userdata;/* set if is user data */ - bool aeof; /* allocated space at eof */ - bool conv; /* overwriting unwritten extents */ - bool stack_switch; - bool kswapd; /* allocation in kswapd context */ + char eof; /* set if allocating past last extent */ + char wasdel; /* replacing a delayed allocation */ + char userdata;/* set if is user data */ + char aeof; /* allocated space at eof */ + char conv; /* overwriting unwritten extents */ + char stack_switch; int flags; struct completion *done; struct work_struct work; From cf11da9c5d374962913ca5ba0ce0886b58286224 Mon Sep 17 00:00:00 2001 From: Dave Chinner Date: Tue, 15 Jul 2014 07:08:24 +1000 Subject: [PATCH 176/268] xfs: refine the allocation stack switch The allocation stack switch at xfs_bmapi_allocate() has served it's purpose, but is no longer a sufficient solution to the stack usage problem we have in the XFS allocation path. Whilst the kernel stack size is now 16k, that is not a valid reason for undoing all our "keep stack usage down" modifications. What it does allow us to do is have the freedom to refine and perfect the modifications knowing that if we get it wrong it won't blow up in our faces - we have a safety net now. This is important because we still have the issue of older kernels having smaller stacks and that they are still supported and are demonstrating a wide range of different stack overflows. Red Hat has several open bugs for allocation based stack overflows from directory modifications and direct IO block allocation and these problems still need to be solved. If we can solve them upstream, then distro's won't need to bake their own unique solutions. To that end, I've observed that every allocation based stack overflow report has had a specific characteristic - it has happened during or directly after a bmap btree block split. That event requires a new block to be allocated to the tree, and so we effectively stack one allocation stack on top of another, and that's when we get into trouble. A further observation is that bmap btree block splits are much rarer than writeback allocation - over a range of different workloads I've observed the ratio of bmap btree inserts to splits ranges from 100:1 (xfstests run) to 10000:1 (local VM image server with sparse files that range in the hundreds of thousands to millions of extents). Either way, bmap btree split events are much, much rarer than allocation events. Finally, we have to move the kswapd state to the allocation workqueue work when allocation is done on behalf of kswapd. This is proving to cause significant perturbation in performance under memory pressure and appears to be generating allocation deadlock warnings under some workloads, so avoiding the use of a workqueue for the majority of kswapd writeback allocation will minimise the impact of such behaviour. Hence it makes sense to move the stack switch to xfs_btree_split() and only do it for bmap btree splits. Stack switches during allocation will be much rarer, so there won't be significant performacne overhead caused by switching stacks. The worse case stack from all allocation paths will be split, not just writeback. And the majority of memory allocations will be done in the correct context (e.g. kswapd) without causing additional latency, and so we simplify the memory reclaim interactions between processes, workqueues and kswapd. The worst stack I've been able to generate with this patch in place is 5600 bytes deep. It's very revealing because we exit XFS at: 37) 1768 64 kmem_cache_alloc+0x13b/0x170 about 1800 bytes of stack consumed, and the remaining 3800 bytes (and 36 functions) is memory reclaim, swap and the IO stack. And this occurs in the inode allocation from an open(O_CREAT) syscall, not writeback. The amount of stack being used is much less than I've previously be able to generate - fs_mark testing has been able to generate stack usage of around 7k without too much trouble; with this patch it's only just getting to 5.5k. This is primarily because the metadata allocation paths (e.g. directory blocks) are no longer causing double splits on the same stack, and hence now stack tracing is showing swapping being the worst stack consumer rather than XFS. Performance of fs_mark inode create workloads is unchanged. Performance of fs_mark async fsync workloads is consistently good with context switches reduced by around 150,000/s (30%). Performance of dbench, streaming IO and postmark is unchanged. Allocation deadlock warnings have not been seen on the workloads that generated them since adding this patch. Signed-off-by: Dave Chinner Reviewed-by: Brian Foster Signed-off-by: Dave Chinner --- fs/xfs/xfs_bmap.c | 7 ++-- fs/xfs/xfs_bmap.h | 4 +-- fs/xfs/xfs_bmap_util.c | 43 ---------------------- fs/xfs/xfs_bmap_util.h | 13 +++---- fs/xfs/xfs_btree.c | 82 +++++++++++++++++++++++++++++++++++++++++- fs/xfs/xfs_iomap.c | 3 +- 6 files changed, 90 insertions(+), 62 deletions(-) diff --git a/fs/xfs/xfs_bmap.c b/fs/xfs/xfs_bmap.c index 96175df211b19..75c3fe5f3d9d8 100644 --- a/fs/xfs/xfs_bmap.c +++ b/fs/xfs/xfs_bmap.c @@ -4298,8 +4298,8 @@ xfs_bmapi_delay( } -int -__xfs_bmapi_allocate( +static int +xfs_bmapi_allocate( struct xfs_bmalloca *bma) { struct xfs_mount *mp = bma->ip->i_mount; @@ -4578,9 +4578,6 @@ xfs_bmapi_write( bma.flist = flist; bma.firstblock = firstblock; - if (flags & XFS_BMAPI_STACK_SWITCH) - bma.stack_switch = 1; - while (bno < end && n < *nmap) { inhole = eof || bma.got.br_startoff > bno; wasdelay = !inhole && isnullstartblock(bma.got.br_startblock); diff --git a/fs/xfs/xfs_bmap.h b/fs/xfs/xfs_bmap.h index 38ba36e9b2f0c..b879ca56a64cc 100644 --- a/fs/xfs/xfs_bmap.h +++ b/fs/xfs/xfs_bmap.h @@ -77,7 +77,6 @@ typedef struct xfs_bmap_free * from written to unwritten, otherwise convert from unwritten to written. */ #define XFS_BMAPI_CONVERT 0x040 -#define XFS_BMAPI_STACK_SWITCH 0x080 #define XFS_BMAPI_FLAGS \ { XFS_BMAPI_ENTIRE, "ENTIRE" }, \ @@ -86,8 +85,7 @@ typedef struct xfs_bmap_free { XFS_BMAPI_PREALLOC, "PREALLOC" }, \ { XFS_BMAPI_IGSTATE, "IGSTATE" }, \ { XFS_BMAPI_CONTIG, "CONTIG" }, \ - { XFS_BMAPI_CONVERT, "CONVERT" }, \ - { XFS_BMAPI_STACK_SWITCH, "STACK_SWITCH" } + { XFS_BMAPI_CONVERT, "CONVERT" } static inline int xfs_bmapi_aflag(int w) diff --git a/fs/xfs/xfs_bmap_util.c b/fs/xfs/xfs_bmap_util.c index 057f671811d61..64731ef3324d4 100644 --- a/fs/xfs/xfs_bmap_util.c +++ b/fs/xfs/xfs_bmap_util.c @@ -248,49 +248,6 @@ xfs_bmap_rtalloc( return 0; } -/* - * Stack switching interfaces for allocation - */ -static void -xfs_bmapi_allocate_worker( - struct work_struct *work) -{ - struct xfs_bmalloca *args = container_of(work, - struct xfs_bmalloca, work); - unsigned long pflags; - - /* we are in a transaction context here */ - current_set_flags_nested(&pflags, PF_FSTRANS); - - args->result = __xfs_bmapi_allocate(args); - complete(args->done); - - current_restore_flags_nested(&pflags, PF_FSTRANS); -} - -/* - * Some allocation requests often come in with little stack to work on. Push - * them off to a worker thread so there is lots of stack to use. Otherwise just - * call directly to avoid the context switch overhead here. - */ -int -xfs_bmapi_allocate( - struct xfs_bmalloca *args) -{ - DECLARE_COMPLETION_ONSTACK(done); - - if (!args->stack_switch) - return __xfs_bmapi_allocate(args); - - - args->done = &done; - INIT_WORK_ONSTACK(&args->work, xfs_bmapi_allocate_worker); - queue_work(xfs_alloc_wq, &args->work); - wait_for_completion(&done); - destroy_work_on_stack(&args->work); - return args->result; -} - /* * Check if the endoff is outside the last extent. If so the caller will grow * the allocation to a stripe unit boundary. All offsets are considered outside diff --git a/fs/xfs/xfs_bmap_util.h b/fs/xfs/xfs_bmap_util.h index 935ed2b24edfb..2fdb72d2c908f 100644 --- a/fs/xfs/xfs_bmap_util.h +++ b/fs/xfs/xfs_bmap_util.h @@ -50,12 +50,11 @@ struct xfs_bmalloca { xfs_extlen_t total; /* total blocks needed for xaction */ xfs_extlen_t minlen; /* minimum allocation size (blocks) */ xfs_extlen_t minleft; /* amount must be left after alloc */ - char eof; /* set if allocating past last extent */ - char wasdel; /* replacing a delayed allocation */ - char userdata;/* set if is user data */ - char aeof; /* allocated space at eof */ - char conv; /* overwriting unwritten extents */ - char stack_switch; + bool eof; /* set if allocating past last extent */ + bool wasdel; /* replacing a delayed allocation */ + bool userdata;/* set if is user data */ + bool aeof; /* allocated space at eof */ + bool conv; /* overwriting unwritten extents */ int flags; struct completion *done; struct work_struct work; @@ -65,8 +64,6 @@ struct xfs_bmalloca { int xfs_bmap_finish(struct xfs_trans **tp, struct xfs_bmap_free *flist, int *committed); int xfs_bmap_rtalloc(struct xfs_bmalloca *ap); -int xfs_bmapi_allocate(struct xfs_bmalloca *args); -int __xfs_bmapi_allocate(struct xfs_bmalloca *args); int xfs_bmap_eof(struct xfs_inode *ip, xfs_fileoff_t endoff, int whichfork, int *eof); int xfs_bmap_count_blocks(struct xfs_trans *tp, struct xfs_inode *ip, diff --git a/fs/xfs/xfs_btree.c b/fs/xfs/xfs_btree.c index bf810c6baf2b8..cf893bc1e373a 100644 --- a/fs/xfs/xfs_btree.c +++ b/fs/xfs/xfs_btree.c @@ -33,6 +33,7 @@ #include "xfs_error.h" #include "xfs_trace.h" #include "xfs_cksum.h" +#include "xfs_alloc.h" /* * Cursor allocation zone. @@ -2323,7 +2324,7 @@ xfs_btree_rshift( * record (to be inserted into parent). */ STATIC int /* error */ -xfs_btree_split( +__xfs_btree_split( struct xfs_btree_cur *cur, int level, union xfs_btree_ptr *ptrp, @@ -2503,6 +2504,85 @@ xfs_btree_split( return error; } +struct xfs_btree_split_args { + struct xfs_btree_cur *cur; + int level; + union xfs_btree_ptr *ptrp; + union xfs_btree_key *key; + struct xfs_btree_cur **curp; + int *stat; /* success/failure */ + int result; + bool kswapd; /* allocation in kswapd context */ + struct completion *done; + struct work_struct work; +}; + +/* + * Stack switching interfaces for allocation + */ +static void +xfs_btree_split_worker( + struct work_struct *work) +{ + struct xfs_btree_split_args *args = container_of(work, + struct xfs_btree_split_args, work); + unsigned long pflags; + unsigned long new_pflags = PF_FSTRANS; + + /* + * we are in a transaction context here, but may also be doing work + * in kswapd context, and hence we may need to inherit that state + * temporarily to ensure that we don't block waiting for memory reclaim + * in any way. + */ + if (args->kswapd) + new_pflags |= PF_MEMALLOC | PF_SWAPWRITE | PF_KSWAPD; + + current_set_flags_nested(&pflags, new_pflags); + + args->result = __xfs_btree_split(args->cur, args->level, args->ptrp, + args->key, args->curp, args->stat); + complete(args->done); + + current_restore_flags_nested(&pflags, new_pflags); +} + +/* + * BMBT split requests often come in with little stack to work on. Push + * them off to a worker thread so there is lots of stack to use. For the other + * btree types, just call directly to avoid the context switch overhead here. + */ +STATIC int /* error */ +xfs_btree_split( + struct xfs_btree_cur *cur, + int level, + union xfs_btree_ptr *ptrp, + union xfs_btree_key *key, + struct xfs_btree_cur **curp, + int *stat) /* success/failure */ +{ + struct xfs_btree_split_args args; + DECLARE_COMPLETION_ONSTACK(done); + + if (cur->bc_btnum != XFS_BTNUM_BMAP) + return __xfs_btree_split(cur, level, ptrp, key, curp, stat); + + args.cur = cur; + args.level = level; + args.ptrp = ptrp; + args.key = key; + args.curp = curp; + args.stat = stat; + args.done = &done; + args.kswapd = current_is_kswapd(); + INIT_WORK_ONSTACK(&args.work, xfs_btree_split_worker); + queue_work(xfs_alloc_wq, &args.work); + wait_for_completion(&done); + destroy_work_on_stack(&args.work); + return args.result; +} + + /* * Copy the old inode root contents into a real block and make the * broot point to it. diff --git a/fs/xfs/xfs_iomap.c b/fs/xfs/xfs_iomap.c index 6c5eb4c551e3f..6d3ec2b6ee294 100644 --- a/fs/xfs/xfs_iomap.c +++ b/fs/xfs/xfs_iomap.c @@ -749,8 +749,7 @@ xfs_iomap_write_allocate( * pointer that the caller gave to us. */ error = xfs_bmapi_write(tp, ip, map_start_fsb, - count_fsb, - XFS_BMAPI_STACK_SWITCH, + count_fsb, 0, &first_block, 1, imap, &nimaps, &free_list); if (error) From c6930992948adf0f8fc1f6ff1da51c5002a2cf95 Mon Sep 17 00:00:00 2001 From: Dave Airlie Date: Mon, 14 Jul 2014 11:04:39 +1000 Subject: [PATCH 177/268] Revert "drm/i915: reverse dp link param selection, prefer fast over wide again" This reverts commit 38aecea0ccbb909d635619cba22f1891e589b434. This breaks Haswell Thinkpad + Lenovo dock in SST mode with a HDMI monitor attached. Before this we can 1920x1200 mode, after this we only ever get 1024x768, and a lot of deferring. This didn't revert clean, but this should be fine. bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=1117008 Cc: stable@vger.kernel.org # v3.15 Signed-off-by: Dave Airlie Signed-off-by: Daniel Vetter --- drivers/gpu/drm/i915/intel_dp.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_dp.c b/drivers/gpu/drm/i915/intel_dp.c index 075170d1844fa..8a1a4fbc06ac8 100644 --- a/drivers/gpu/drm/i915/intel_dp.c +++ b/drivers/gpu/drm/i915/intel_dp.c @@ -906,8 +906,8 @@ intel_dp_compute_config(struct intel_encoder *encoder, mode_rate = intel_dp_link_required(adjusted_mode->crtc_clock, bpp); - for (lane_count = min_lane_count; lane_count <= max_lane_count; lane_count <<= 1) { - for (clock = min_clock; clock <= max_clock; clock++) { + for (clock = min_clock; clock <= max_clock; clock++) { + for (lane_count = min_lane_count; lane_count <= max_lane_count; lane_count <<= 1) { link_clock = drm_dp_bw_code_to_link_rate(bws[clock]); link_avail = intel_dp_max_data_rate(link_clock, lane_count); From 8f2e5ae40ec193bc0a0ed99e95315c3eebca84ea Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Sat, 12 Jul 2014 20:30:35 +0200 Subject: [PATCH 178/268] net: sctp: fix information leaks in ulpevent layer While working on some other SCTP code, I noticed that some structures shared with user space are leaking uninitialized stack or heap buffer. In particular, struct sctp_sndrcvinfo has a 2 bytes hole between .sinfo_flags and .sinfo_ppid that remains unfilled by us in sctp_ulpevent_read_sndrcvinfo() when putting this into cmsg. But also struct sctp_remote_error contains a 2 bytes hole that we don't fill but place into a skb through skb_copy_expand() via sctp_ulpevent_make_remote_error(). Both structures are defined by the IETF in RFC6458: * Section 5.3.2. SCTP Header Information Structure: The sctp_sndrcvinfo structure is defined below: struct sctp_sndrcvinfo { uint16_t sinfo_stream; uint16_t sinfo_ssn; uint16_t sinfo_flags; <-- 2 bytes hole --> uint32_t sinfo_ppid; uint32_t sinfo_context; uint32_t sinfo_timetolive; uint32_t sinfo_tsn; uint32_t sinfo_cumtsn; sctp_assoc_t sinfo_assoc_id; }; * 6.1.3. SCTP_REMOTE_ERROR: A remote peer may send an Operation Error message to its peer. This message indicates a variety of error conditions on an association. The entire ERROR chunk as it appears on the wire is included in an SCTP_REMOTE_ERROR event. Please refer to the SCTP specification [RFC4960] and any extensions for a list of possible error formats. An SCTP error notification has the following format: struct sctp_remote_error { uint16_t sre_type; uint16_t sre_flags; uint32_t sre_length; uint16_t sre_error; <-- 2 bytes hole --> sctp_assoc_t sre_assoc_id; uint8_t sre_data[]; }; Fix this by setting both to 0 before filling them out. We also have other structures shared between user and kernel space in SCTP that contains holes (e.g. struct sctp_paddrthlds), but we copy that buffer over from user space first and thus don't need to care about it in that cases. While at it, we can also remove lengthy comments copied from the draft, instead, we update the comment with the correct RFC number where one can look it up. Signed-off-by: Daniel Borkmann Signed-off-by: David S. Miller --- net/sctp/ulpevent.c | 122 ++++++-------------------------------------- 1 file changed, 15 insertions(+), 107 deletions(-) diff --git a/net/sctp/ulpevent.c b/net/sctp/ulpevent.c index 85c64658bd0b1..b6842fdb53d4b 100644 --- a/net/sctp/ulpevent.c +++ b/net/sctp/ulpevent.c @@ -366,9 +366,10 @@ struct sctp_ulpevent *sctp_ulpevent_make_peer_addr_change( * specification [SCTP] and any extensions for a list of possible * error formats. */ -struct sctp_ulpevent *sctp_ulpevent_make_remote_error( - const struct sctp_association *asoc, struct sctp_chunk *chunk, - __u16 flags, gfp_t gfp) +struct sctp_ulpevent * +sctp_ulpevent_make_remote_error(const struct sctp_association *asoc, + struct sctp_chunk *chunk, __u16 flags, + gfp_t gfp) { struct sctp_ulpevent *event; struct sctp_remote_error *sre; @@ -387,8 +388,7 @@ struct sctp_ulpevent *sctp_ulpevent_make_remote_error( /* Copy the skb to a new skb with room for us to prepend * notification with. */ - skb = skb_copy_expand(chunk->skb, sizeof(struct sctp_remote_error), - 0, gfp); + skb = skb_copy_expand(chunk->skb, sizeof(*sre), 0, gfp); /* Pull off the rest of the cause TLV from the chunk. */ skb_pull(chunk->skb, elen); @@ -399,62 +399,21 @@ struct sctp_ulpevent *sctp_ulpevent_make_remote_error( event = sctp_skb2event(skb); sctp_ulpevent_init(event, MSG_NOTIFICATION, skb->truesize); - sre = (struct sctp_remote_error *) - skb_push(skb, sizeof(struct sctp_remote_error)); + sre = (struct sctp_remote_error *) skb_push(skb, sizeof(*sre)); /* Trim the buffer to the right length. */ - skb_trim(skb, sizeof(struct sctp_remote_error) + elen); + skb_trim(skb, sizeof(*sre) + elen); - /* Socket Extensions for SCTP - * 5.3.1.3 SCTP_REMOTE_ERROR - * - * sre_type: - * It should be SCTP_REMOTE_ERROR. - */ + /* RFC6458, Section 6.1.3. SCTP_REMOTE_ERROR */ + memset(sre, 0, sizeof(*sre)); sre->sre_type = SCTP_REMOTE_ERROR; - - /* - * Socket Extensions for SCTP - * 5.3.1.3 SCTP_REMOTE_ERROR - * - * sre_flags: 16 bits (unsigned integer) - * Currently unused. - */ sre->sre_flags = 0; - - /* Socket Extensions for SCTP - * 5.3.1.3 SCTP_REMOTE_ERROR - * - * sre_length: sizeof (__u32) - * - * This field is the total length of the notification data, - * including the notification header. - */ sre->sre_length = skb->len; - - /* Socket Extensions for SCTP - * 5.3.1.3 SCTP_REMOTE_ERROR - * - * sre_error: 16 bits (unsigned integer) - * This value represents one of the Operational Error causes defined in - * the SCTP specification, in network byte order. - */ sre->sre_error = cause; - - /* Socket Extensions for SCTP - * 5.3.1.3 SCTP_REMOTE_ERROR - * - * sre_assoc_id: sizeof (sctp_assoc_t) - * - * The association id field, holds the identifier for the association. - * All notifications for a given association have the same association - * identifier. For TCP style socket, this field is ignored. - */ sctp_ulpevent_set_owner(event, asoc); sre->sre_assoc_id = sctp_assoc2id(asoc); return event; - fail: return NULL; } @@ -899,7 +858,9 @@ __u16 sctp_ulpevent_get_notification_type(const struct sctp_ulpevent *event) return notification->sn_header.sn_type; } -/* Copy out the sndrcvinfo into a msghdr. */ +/* RFC6458, Section 5.3.2. SCTP Header Information Structure + * (SCTP_SNDRCV, DEPRECATED) + */ void sctp_ulpevent_read_sndrcvinfo(const struct sctp_ulpevent *event, struct msghdr *msghdr) { @@ -908,74 +869,21 @@ void sctp_ulpevent_read_sndrcvinfo(const struct sctp_ulpevent *event, if (sctp_ulpevent_is_notification(event)) return; - /* Sockets API Extensions for SCTP - * Section 5.2.2 SCTP Header Information Structure (SCTP_SNDRCV) - * - * sinfo_stream: 16 bits (unsigned integer) - * - * For recvmsg() the SCTP stack places the message's stream number in - * this value. - */ + memset(&sinfo, 0, sizeof(sinfo)); sinfo.sinfo_stream = event->stream; - /* sinfo_ssn: 16 bits (unsigned integer) - * - * For recvmsg() this value contains the stream sequence number that - * the remote endpoint placed in the DATA chunk. For fragmented - * messages this is the same number for all deliveries of the message - * (if more than one recvmsg() is needed to read the message). - */ sinfo.sinfo_ssn = event->ssn; - /* sinfo_ppid: 32 bits (unsigned integer) - * - * In recvmsg() this value is - * the same information that was passed by the upper layer in the peer - * application. Please note that byte order issues are NOT accounted - * for and this information is passed opaquely by the SCTP stack from - * one end to the other. - */ sinfo.sinfo_ppid = event->ppid; - /* sinfo_flags: 16 bits (unsigned integer) - * - * This field may contain any of the following flags and is composed of - * a bitwise OR of these values. - * - * recvmsg() flags: - * - * SCTP_UNORDERED - This flag is present when the message was sent - * non-ordered. - */ sinfo.sinfo_flags = event->flags; - /* sinfo_tsn: 32 bit (unsigned integer) - * - * For the receiving side, this field holds a TSN that was - * assigned to one of the SCTP Data Chunks. - */ sinfo.sinfo_tsn = event->tsn; - /* sinfo_cumtsn: 32 bit (unsigned integer) - * - * This field will hold the current cumulative TSN as - * known by the underlying SCTP layer. Note this field is - * ignored when sending and only valid for a receive - * operation when sinfo_flags are set to SCTP_UNORDERED. - */ sinfo.sinfo_cumtsn = event->cumtsn; - /* sinfo_assoc_id: sizeof (sctp_assoc_t) - * - * The association handle field, sinfo_assoc_id, holds the identifier - * for the association announced in the COMMUNICATION_UP notification. - * All notifications for a given association have the same identifier. - * Ignored for one-to-one style sockets. - */ sinfo.sinfo_assoc_id = sctp_assoc2id(event->asoc); - - /* context value that is set via SCTP_CONTEXT socket option. */ + /* Context value that is set via SCTP_CONTEXT socket option. */ sinfo.sinfo_context = event->asoc->default_rcv_context; - /* These fields are not used while receiving. */ sinfo.sinfo_timetolive = 0; put_cmsg(msghdr, IPPROTO_SCTP, SCTP_SNDRCV, - sizeof(struct sctp_sndrcvinfo), (void *)&sinfo); + sizeof(sinfo), &sinfo); } /* Do accounting for bytes received and hold a reference to the association From 03e01349c654fbdea80d3d9b4ab599244eb55bb7 Mon Sep 17 00:00:00 2001 From: Dave Chinner Date: Tue, 15 Jul 2014 07:28:41 +1000 Subject: [PATCH 179/268] xfs: null unused quota inodes when quota is on When quota is on, it is expected that unused quota inodes have a value of NULLFSINO. The changes to support a separate project quota in 3.12 broken this rule for non-project quota inode enabled filesystem, as the code now refuses to write the group quota inode if neither group or project quotas are enabled. This regression was introduced by commit d892d58 ("xfs: Start using pquotaino from the superblock"). In this case, we should be writing NULLFSINO rather than nothing to ensure that we leave the group quota inode in a valid state while quotas are enabled. Failure to do so doesn't cause a current kernel to break - the separate project quota inodes introduced translation code to always treat a zero inode as NULLFSINO. This was introduced by commit 0102629 ("xfs: Initialize all quota inodes to be NULLFSINO") with is also in 3.12 but older kernels do not do this and hence taking a filesystem back to an older kernel can result in quotas failing initialisation at mount time. When that happens, we see this in dmesg: [ 1649.215390] XFS (sdb): Mounting Filesystem [ 1649.316894] XFS (sdb): Failed to initialize disk quotas. [ 1649.316902] XFS (sdb): Ending clean mount By ensuring that we write NULLFSINO to quota inodes that aren't active, we avoid this problem. We have to be really careful when determining if the quota inodes are active or not, because we don't want to write a NULLFSINO if the quota inodes are active and we simply aren't updating them. Signed-off-by: Dave Chinner Reviewed-by: Brian Foster Signed-off-by: Dave Chinner --- fs/xfs/xfs_sb.c | 25 +++++++++++++++++++++---- 1 file changed, 21 insertions(+), 4 deletions(-) diff --git a/fs/xfs/xfs_sb.c b/fs/xfs/xfs_sb.c index c3453b11f5636..7703fa6770ff7 100644 --- a/fs/xfs/xfs_sb.c +++ b/fs/xfs/xfs_sb.c @@ -483,10 +483,16 @@ xfs_sb_quota_to_disk( } /* - * GQUOTINO and PQUOTINO cannot be used together in versions - * of superblock that do not have pquotino. from->sb_flags - * tells us which quota is active and should be copied to - * disk. + * GQUOTINO and PQUOTINO cannot be used together in versions of + * superblock that do not have pquotino. from->sb_flags tells us which + * quota is active and should be copied to disk. If neither are active, + * make sure we write NULLFSINO to the sb_gquotino field as a quota + * inode value of "0" is invalid when the XFS_SB_VERSION_QUOTA feature + * bit is set. + * + * Note that we don't need to handle the sb_uquotino or sb_pquotino here + * as they do not require any translation. Hence the main sb field loop + * will write them appropriately from the in-core superblock. */ if ((*fields & XFS_SB_GQUOTINO) && (from->sb_qflags & XFS_GQUOTA_ACCT)) @@ -494,6 +500,17 @@ xfs_sb_quota_to_disk( else if ((*fields & XFS_SB_PQUOTINO) && (from->sb_qflags & XFS_PQUOTA_ACCT)) to->sb_gquotino = cpu_to_be64(from->sb_pquotino); + else { + /* + * We can't rely on just the fields being logged to tell us + * that it is safe to write NULLFSINO - we should only do that + * if quotas are not actually enabled. Hence only write + * NULLFSINO if both in-core quota inodes are NULL. + */ + if (from->sb_gquotino == NULLFSINO && + from->sb_pquotino == NULLFSINO) + to->sb_gquotino = cpu_to_be64(NULLFSINO); + } *fields &= ~(XFS_SB_PQUOTINO | XFS_SB_GQUOTINO); } From 9ecf07a1d8f70f72ec99a0f102c8aa24609d84f4 Mon Sep 17 00:00:00 2001 From: Mathias Krause Date: Sat, 12 Jul 2014 22:36:44 +0200 Subject: [PATCH 180/268] neigh: sysctl - simplify address calculation of gc_* variables The code in neigh_sysctl_register() relies on a specific layout of struct neigh_table, namely that the 'gc_*' variables are directly following the 'parms' member in a specific order. The code, though, expresses this in the most ugly way. Get rid of the ugly casts and use the 'tbl' pointer to get a handle to the table. This way we can refer to the 'gc_*' variables directly. Similarly seen in the grsecurity patch, written by Brad Spengler. Signed-off-by: Mathias Krause Cc: Brad Spengler Signed-off-by: David S. Miller --- include/net/neighbour.h | 1 - net/core/neighbour.c | 9 +++++---- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/include/net/neighbour.h b/include/net/neighbour.h index 7277caf3743d2..47f425464f847 100644 --- a/include/net/neighbour.h +++ b/include/net/neighbour.h @@ -203,7 +203,6 @@ struct neigh_table { void (*proxy_redo)(struct sk_buff *skb); char *id; struct neigh_parms parms; - /* HACK. gc_* should follow parms without a gap! */ int gc_interval; int gc_thresh1; int gc_thresh2; diff --git a/net/core/neighbour.c b/net/core/neighbour.c index 32d872eec7f5c..559890b0f0a2c 100644 --- a/net/core/neighbour.c +++ b/net/core/neighbour.c @@ -3059,11 +3059,12 @@ int neigh_sysctl_register(struct net_device *dev, struct neigh_parms *p, memset(&t->neigh_vars[NEIGH_VAR_GC_INTERVAL], 0, sizeof(t->neigh_vars[NEIGH_VAR_GC_INTERVAL])); } else { + struct neigh_table *tbl = p->tbl; dev_name_source = "default"; - t->neigh_vars[NEIGH_VAR_GC_INTERVAL].data = (int *)(p + 1); - t->neigh_vars[NEIGH_VAR_GC_THRESH1].data = (int *)(p + 1) + 1; - t->neigh_vars[NEIGH_VAR_GC_THRESH2].data = (int *)(p + 1) + 2; - t->neigh_vars[NEIGH_VAR_GC_THRESH3].data = (int *)(p + 1) + 3; + t->neigh_vars[NEIGH_VAR_GC_INTERVAL].data = &tbl->gc_interval; + t->neigh_vars[NEIGH_VAR_GC_THRESH1].data = &tbl->gc_thresh1; + t->neigh_vars[NEIGH_VAR_GC_THRESH2].data = &tbl->gc_thresh2; + t->neigh_vars[NEIGH_VAR_GC_THRESH3].data = &tbl->gc_thresh3; } if (handler) { From a8a3e41c67d24eb12f9ab9680cbb85e24fcd9711 Mon Sep 17 00:00:00 2001 From: Christoph Schulz Date: Sun, 13 Jul 2014 00:53:15 +0200 Subject: [PATCH 181/268] net: pppoe: use correct channel MTU when using Multilink PPP The PPP channel MTU is used with Multilink PPP when ppp_mp_explode() (see ppp_generic module) tries to determine how big a fragment might be. According to RFC 1661, the MTU excludes the 2-byte PPP protocol field, see the corresponding comment and code in ppp_mp_explode(): /* * hdrlen includes the 2-byte PPP protocol field, but the * MTU counts only the payload excluding the protocol field. * (RFC1661 Section 2) */ mtu = pch->chan->mtu - (hdrlen - 2); However, the pppoe module *does* include the PPP protocol field in the channel MTU, which is wrong as it causes the PPP payload to be 1-2 bytes too big under certain circumstances (one byte if PPP protocol compression is used, two otherwise), causing the generated Ethernet packets to be dropped. So the pppoe module has to subtract two bytes from the channel MTU. This error only manifests itself when using Multilink PPP, as otherwise the channel MTU is not used anywhere. In the following, I will describe how to reproduce this bug. We configure two pppd instances for multilink PPP over two PPPoE links, say eth2 and eth3, with a MTU of 1492 bytes for each link and a MRRU of 2976 bytes. (This MRRU is computed by adding the two link MTUs and subtracting the MP header twice, which is 4 bytes long.) The necessary pppd statements on both sides are "multilink mtu 1492 mru 1492 mrru 2976". On the client side, we additionally need "plugin rp-pppoe.so eth2" and "plugin rp-pppoe.so eth3", respectively; on the server side, we additionally need to start two pppoe-server instances to be able to establish two PPPoE sessions, one over eth2 and one over eth3. We set the MTU of the PPP network interface to the MRRU (2976) on both sides of the connection in order to make use of the higher bandwidth. (If we didn't do that, IP fragmentation would kick in, which we want to avoid.) Now we send a ICMPv4 echo request with a payload of 2948 bytes from client to server over the PPP link. This results in the following network packet: 2948 (echo payload) + 8 (ICMPv4 header) + 20 (IPv4 header) --------------------- 2976 (PPP payload) These 2976 bytes do not exceed the MTU of the PPP network interface, so the IP packet is not fragmented. Now the multilink PPP code in ppp_mp_explode() prepends one protocol byte (0x21 for IPv4), making the packet one byte bigger than the negotiated MRRU. So this packet would have to be divided in three fragments. But this does not happen as each link MTU is assumed to be two bytes larger. So this packet is diveded into two fragments only, one of size 1489 and one of size 1488. Now we have for that bigger fragment: 1489 (PPP payload) + 4 (MP header) + 2 (PPP protocol field for the MP payload (0x3d)) + 6 (PPPoE header) -------------------------- 1501 (Ethernet payload) This packet exceeds the link MTU and is discarded. If one configures the link MTU on the client side to 1501, one can see the discarded Ethernet frames with tcpdump running on the client. A ping -s 2948 -c 1 192.168.15.254 leads to the smaller fragment that is correctly received on the server side: (tcpdump -vvvne -i eth3 pppoes and ppp proto 0x3d) 52:54:00:ad:87:fd > 52:54:00:79:5c:d0, ethertype PPPoE S (0x8864), length 1514: PPPoE [ses 0x3] MLPPP (0x003d), length 1494: seq 0x000, Flags [end], length 1492 and to the bigger fragment that is not received on the server side: (tcpdump -vvvne -i eth2 pppoes and ppp proto 0x3d) 52:54:00:70:9e:89 > 52:54:00:5d:6f:b0, ethertype PPPoE S (0x8864), length 1515: PPPoE [ses 0x5] MLPPP (0x003d), length 1495: seq 0x000, Flags [begin], length 1493 With the patch below, we correctly obtain three fragments: 52:54:00:ad:87:fd > 52:54:00:79:5c:d0, ethertype PPPoE S (0x8864), length 1514: PPPoE [ses 0x1] MLPPP (0x003d), length 1494: seq 0x000, Flags [begin], length 1492 52:54:00:70:9e:89 > 52:54:00:5d:6f:b0, ethertype PPPoE S (0x8864), length 1514: PPPoE [ses 0x1] MLPPP (0x003d), length 1494: seq 0x000, Flags [none], length 1492 52:54:00:ad:87:fd > 52:54:00:79:5c:d0, ethertype PPPoE S (0x8864), length 27: PPPoE [ses 0x1] MLPPP (0x003d), length 7: seq 0x000, Flags [end], length 5 And the ICMPv4 echo request is successfully received at the server side: IP (tos 0x0, ttl 64, id 21925, offset 0, flags [DF], proto ICMP (1), length 2976) 192.168.222.2 > 192.168.15.254: ICMP echo request, id 30530, seq 0, length 2956 The bug was introduced in commit c9aa6895371b2a257401f59d3393c9f7ac5a8698 ("[PPPOE]: Advertise PPPoE MTU") from the very beginning. This patch applies to 3.10 upwards but the fix can be applied (with minor modifications) to kernels as old as 2.6.32. Signed-off-by: Christoph Schulz Signed-off-by: David S. Miller --- drivers/net/ppp/pppoe.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ppp/pppoe.c b/drivers/net/ppp/pppoe.c index 2ea7efd118577..6c9c16d76935f 100644 --- a/drivers/net/ppp/pppoe.c +++ b/drivers/net/ppp/pppoe.c @@ -675,7 +675,7 @@ static int pppoe_connect(struct socket *sock, struct sockaddr *uservaddr, po->chan.hdrlen = (sizeof(struct pppoe_hdr) + dev->hard_header_len); - po->chan.mtu = dev->mtu - sizeof(struct pppoe_hdr); + po->chan.mtu = dev->mtu - sizeof(struct pppoe_hdr) - 2; po->chan.private = sk; po->chan.ops = &pppoe_chan_ops; From 548d28bd0eac840d122b691279ce9f4ce6ecbfb6 Mon Sep 17 00:00:00 2001 From: Nikolay Aleksandrov Date: Sun, 13 Jul 2014 09:47:47 +0200 Subject: [PATCH 182/268] bonding: fix ad_select module param check Obvious copy/paste error when I converted the ad_select to the new option API. "lacp_rate" there should be "ad_select" so we can get the proper value. CC: Jay Vosburgh CC: Veaceslav Falico CC: Andy Gospodarek CC: David S. Miller Fixes: 9e5f5eebe765 ("bonding: convert ad_select to use the new option API") Reported-by: Karim Scheik Signed-off-by: Nikolay Aleksandrov Signed-off-by: David S. Miller --- drivers/net/bonding/bond_main.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c index 3a451b6cd3d50..701f86cd59932 100644 --- a/drivers/net/bonding/bond_main.c +++ b/drivers/net/bonding/bond_main.c @@ -4068,7 +4068,7 @@ static int bond_check_params(struct bond_params *params) } if (ad_select) { - bond_opt_initstr(&newval, lacp_rate); + bond_opt_initstr(&newval, ad_select); valptr = bond_opt_parse(bond_opt_get(BOND_OPT_AD_SELECT), &newval); if (!valptr) { From 32b333fe99069d090a12cc106fd5ae1002fe642f Mon Sep 17 00:00:00 2001 From: Jason Wang Date: Mon, 14 Jul 2014 11:42:44 +0800 Subject: [PATCH 183/268] mlx4: mark napi id for gro_skb Napi id was not marked for gro_skb, this will lead rx busy loop won't work correctly since they stack never try to call low latency receive method because of a zero socket napi id. Fix this by marking napi id for gro_skb. The transaction rate of 1 byte netperf tcp_rr gets about 50% increased (from 20531.68 to 30610.88). Cc: Amir Vadai Signed-off-by: Jason Wang Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlx4/en_rx.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/ethernet/mellanox/mlx4/en_rx.c b/drivers/net/ethernet/mellanox/mlx4/en_rx.c index 96724170308a3..5535862f27cc5 100644 --- a/drivers/net/ethernet/mellanox/mlx4/en_rx.c +++ b/drivers/net/ethernet/mellanox/mlx4/en_rx.c @@ -783,6 +783,7 @@ int mlx4_en_process_rx_cq(struct net_device *dev, struct mlx4_en_cq *cq, int bud PKT_HASH_TYPE_L3); skb_record_rx_queue(gro_skb, cq->ring); + skb_mark_napi_id(gro_skb, &cq->napi); if (ring->hwtstamp_rx_filter == HWTSTAMP_FILTER_ALL) { timestamp = mlx4_en_get_cqe_ts(cqe); From 3916a3192793fd3c11f69d623ef0cdbdbf9ea10a Mon Sep 17 00:00:00 2001 From: Christoph Schulz Date: Mon, 14 Jul 2014 08:01:10 +0200 Subject: [PATCH 184/268] net: ppp: don't call sk_chk_filter twice Commit 568f194e8bd16c353ad50f9ab95d98b20578a39d ("net: ppp: use sk_unattached_filter api") causes sk_chk_filter() to be called twice when setting a PPP pass or active filter. This applies to both the generic PPP subsystem implemented by drivers/net/ppp/ppp_generic.c and the ISDN PPP subsystem implemented by drivers/isdn/i4l/isdn_ppp.c. The first call is from within get_filter(). The second one is through the call chain ppp_ioctl() or isdn_ppp_ioctl() --> sk_unattached_filter_create() --> __sk_prepare_filter() --> sk_chk_filter() The first call from within get_filter() should be deleted as get_filter() is called just before calling sk_unattached_filter_create() later on, which eventually calls sk_chk_filter() anyway. For 3.15.x, this proposed change is a bugfix rather than a pure optimization as in that branch, sk_chk_filter() may replace filter codes by other codes which are not recognized when executing sk_chk_filter() a second time. So with 3.15.x, if sk_chk_filter() is called twice, the second invocation may yield EINVAL (this depends on the filter codes found in the filter to be set, but because the replacement is done for frequently used codes, this is almost always the case). The net effect is that setting pass and/or active PPP filters does not work anymore, since sk_unattached_filter_create() always returns EINVAL due to the second call to sk_chk_filter(), regardless whether the filter was originally sane or not. Signed-off-by: Christoph Schulz Acked-by: Daniel Borkmann Signed-off-by: David S. Miller --- drivers/isdn/i4l/isdn_ppp.c | 8 +------- drivers/net/ppp/ppp_generic.c | 8 +------- 2 files changed, 2 insertions(+), 14 deletions(-) diff --git a/drivers/isdn/i4l/isdn_ppp.c b/drivers/isdn/i4l/isdn_ppp.c index 61ac632374460..a333b7f798d17 100644 --- a/drivers/isdn/i4l/isdn_ppp.c +++ b/drivers/isdn/i4l/isdn_ppp.c @@ -442,7 +442,7 @@ static int get_filter(void __user *arg, struct sock_filter **p) { struct sock_fprog uprog; struct sock_filter *code = NULL; - int len, err; + int len; if (copy_from_user(&uprog, arg, sizeof(uprog))) return -EFAULT; @@ -458,12 +458,6 @@ static int get_filter(void __user *arg, struct sock_filter **p) if (IS_ERR(code)) return PTR_ERR(code); - err = sk_chk_filter(code, uprog.len); - if (err) { - kfree(code); - return err; - } - *p = code; return uprog.len; } diff --git a/drivers/net/ppp/ppp_generic.c b/drivers/net/ppp/ppp_generic.c index 91d6c1272fcf0..e2f20f807de81 100644 --- a/drivers/net/ppp/ppp_generic.c +++ b/drivers/net/ppp/ppp_generic.c @@ -539,7 +539,7 @@ static int get_filter(void __user *arg, struct sock_filter **p) { struct sock_fprog uprog; struct sock_filter *code = NULL; - int len, err; + int len; if (copy_from_user(&uprog, arg, sizeof(uprog))) return -EFAULT; @@ -554,12 +554,6 @@ static int get_filter(void __user *arg, struct sock_filter **p) if (IS_ERR(code)) return PTR_ERR(code); - err = sk_chk_filter(code, uprog.len); - if (err) { - kfree(code); - return err; - } - *p = code; return uprog.len; } From 3cf521f7dc87c031617fd47e4b7aa2593c2f3daf Mon Sep 17 00:00:00 2001 From: Sasha Levin Date: Mon, 14 Jul 2014 17:02:31 -0700 Subject: [PATCH 185/268] net/l2tp: don't fall back on UDP [get|set]sockopt The l2tp [get|set]sockopt() code has fallen back to the UDP functions for socket option levels != SOL_PPPOL2TP since day one, but that has never actually worked, since the l2tp socket isn't an inet socket. As David Miller points out: "If we wanted this to work, it'd have to look up the tunnel and then use tunnel->sk, but I wonder how useful that would be" Since this can never have worked so nobody could possibly have depended on that functionality, just remove the broken code and return -EINVAL. Reported-by: Sasha Levin Acked-by: James Chapman Acked-by: David Miller Cc: Phil Turnbull Cc: Vegard Nossum Cc: Willy Tarreau Cc: stable@vger.kernel.org Signed-off-by: Linus Torvalds --- net/l2tp/l2tp_ppp.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/net/l2tp/l2tp_ppp.c b/net/l2tp/l2tp_ppp.c index 950909f04ee6a..13752d96275e8 100644 --- a/net/l2tp/l2tp_ppp.c +++ b/net/l2tp/l2tp_ppp.c @@ -1365,7 +1365,7 @@ static int pppol2tp_setsockopt(struct socket *sock, int level, int optname, int err; if (level != SOL_PPPOL2TP) - return udp_prot.setsockopt(sk, level, optname, optval, optlen); + return -EINVAL; if (optlen < sizeof(int)) return -EINVAL; @@ -1491,7 +1491,7 @@ static int pppol2tp_getsockopt(struct socket *sock, int level, int optname, struct pppol2tp_session *ps; if (level != SOL_PPPOL2TP) - return udp_prot.getsockopt(sk, level, optname, optval, optlen); + return -EINVAL; if (get_user(len, optlen)) return -EFAULT; From db4175ae2095634dbecd4c847da439f9c83e1b3b Mon Sep 17 00:00:00 2001 From: Antti Palosaari Date: Fri, 4 Jul 2014 05:44:39 -0300 Subject: [PATCH 186/268] [media] tda10071: force modulation to QPSK on DVB-S Only supported modulation for DVB-S is QPSK. Modulation parameter contains invalid value for DVB-S on some cases, which leads driver refusing tuning attempt. Due to that, hard code modulation to QPSK in case of DVB-S. Cc: stable@vger.kernel.org Signed-off-by: Antti Palosaari Signed-off-by: Mauro Carvalho Chehab --- drivers/media/dvb-frontends/tda10071.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/drivers/media/dvb-frontends/tda10071.c b/drivers/media/dvb-frontends/tda10071.c index 522fe00f5eee1..49874e76548b1 100644 --- a/drivers/media/dvb-frontends/tda10071.c +++ b/drivers/media/dvb-frontends/tda10071.c @@ -668,6 +668,7 @@ static int tda10071_set_frontend(struct dvb_frontend *fe) struct dtv_frontend_properties *c = &fe->dtv_property_cache; int ret, i; u8 mode, rolloff, pilot, inversion, div; + fe_modulation_t modulation; dev_dbg(&priv->i2c->dev, "%s: delivery_system=%d modulation=%d frequency=%d symbol_rate=%d inversion=%d pilot=%d rolloff=%d\n", @@ -702,10 +703,13 @@ static int tda10071_set_frontend(struct dvb_frontend *fe) switch (c->delivery_system) { case SYS_DVBS: + modulation = QPSK; rolloff = 0; pilot = 2; break; case SYS_DVBS2: + modulation = c->modulation; + switch (c->rolloff) { case ROLLOFF_20: rolloff = 2; @@ -750,7 +754,7 @@ static int tda10071_set_frontend(struct dvb_frontend *fe) for (i = 0, mode = 0xff; i < ARRAY_SIZE(TDA10071_MODCOD); i++) { if (c->delivery_system == TDA10071_MODCOD[i].delivery_system && - c->modulation == TDA10071_MODCOD[i].modulation && + modulation == TDA10071_MODCOD[i].modulation && c->fec_inner == TDA10071_MODCOD[i].fec) { mode = TDA10071_MODCOD[i].val; dev_dbg(&priv->i2c->dev, "%s: mode found=%02x\n", From bc760cdae9b67e689ed29c66c9c2d78d6f5f8c4b Mon Sep 17 00:00:00 2001 From: Antti Palosaari Date: Mon, 7 Jul 2014 09:05:15 -0300 Subject: [PATCH 187/268] [media] tda10071: add missing DVB-S2/PSK-8 FEC AUTO FEC AUTO is valid for PSK-8 modulation too. Add it. Signed-off-by: Antti Palosaari Signed-off-by: Mauro Carvalho Chehab --- drivers/media/dvb-frontends/tda10071_priv.h | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/media/dvb-frontends/tda10071_priv.h b/drivers/media/dvb-frontends/tda10071_priv.h index 4baf14bfb65a6..4204861927368 100644 --- a/drivers/media/dvb-frontends/tda10071_priv.h +++ b/drivers/media/dvb-frontends/tda10071_priv.h @@ -55,6 +55,7 @@ static struct tda10071_modcod { { SYS_DVBS2, QPSK, FEC_8_9, 0x0a }, { SYS_DVBS2, QPSK, FEC_9_10, 0x0b }, /* 8PSK */ + { SYS_DVBS2, PSK_8, FEC_AUTO, 0x00 }, { SYS_DVBS2, PSK_8, FEC_3_5, 0x0c }, { SYS_DVBS2, PSK_8, FEC_2_3, 0x0d }, { SYS_DVBS2, PSK_8, FEC_3_4, 0x0e }, From b32725e84c02b4d01472770b96d1b33737b23b6d Mon Sep 17 00:00:00 2001 From: Antti Palosaari Date: Mon, 7 Jul 2014 09:52:28 -0300 Subject: [PATCH 188/268] [media] tda10071: fix spec inversion reporting Inversion ON was reported as inversion OFF and vice versa. Signed-off-by: Antti Palosaari Signed-off-by: Mauro Carvalho Chehab --- drivers/media/dvb-frontends/tda10071.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/media/dvb-frontends/tda10071.c b/drivers/media/dvb-frontends/tda10071.c index 49874e76548b1..d590798a63674 100644 --- a/drivers/media/dvb-frontends/tda10071.c +++ b/drivers/media/dvb-frontends/tda10071.c @@ -838,10 +838,10 @@ static int tda10071_get_frontend(struct dvb_frontend *fe) switch ((buf[1] >> 0) & 0x01) { case 0: - c->inversion = INVERSION_OFF; + c->inversion = INVERSION_ON; break; case 1: - c->inversion = INVERSION_ON; + c->inversion = INVERSION_OFF; break; } From c2c1a6e5851fe354d39e5b7907c6c9d0a997ec16 Mon Sep 17 00:00:00 2001 From: Antti Palosaari Date: Tue, 8 Jul 2014 02:48:28 -0300 Subject: [PATCH 189/268] [media] tda10071: fix returned symbol rate calculation Detected symbol rate value was returned too small. Signed-off-by: Antti Palosaari Signed-off-by: Mauro Carvalho Chehab --- drivers/media/dvb-frontends/tda10071.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/media/dvb-frontends/tda10071.c b/drivers/media/dvb-frontends/tda10071.c index d590798a63674..9619be5d48271 100644 --- a/drivers/media/dvb-frontends/tda10071.c +++ b/drivers/media/dvb-frontends/tda10071.c @@ -860,7 +860,7 @@ static int tda10071_get_frontend(struct dvb_frontend *fe) if (ret) goto error; - c->symbol_rate = (buf[0] << 16) | (buf[1] << 8) | (buf[2] << 0); + c->symbol_rate = ((buf[0] << 16) | (buf[1] << 8) | (buf[2] << 0)) * 1000; return ret; error: From 242841d3d71191348f98310e2d2001e1001d8630 Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Wed, 9 Jul 2014 06:20:44 -0300 Subject: [PATCH 190/268] [media] gspca_pac7302: Add new usb-id for Genius i-Look 317 Tested-and-reported-by: yullaw Cc: stable@vger.kernel.org Signed-off-by: Hans de Goede Signed-off-by: Mauro Carvalho Chehab --- drivers/media/usb/gspca/pac7302.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/media/usb/gspca/pac7302.c b/drivers/media/usb/gspca/pac7302.c index 2fd1c5e31a0f2..339adce7c7a50 100644 --- a/drivers/media/usb/gspca/pac7302.c +++ b/drivers/media/usb/gspca/pac7302.c @@ -928,6 +928,7 @@ static const struct usb_device_id device_table[] = { {USB_DEVICE(0x093a, 0x2620)}, {USB_DEVICE(0x093a, 0x2621)}, {USB_DEVICE(0x093a, 0x2622), .driver_info = FL_VFLIP}, + {USB_DEVICE(0x093a, 0x2623), .driver_info = FL_VFLIP}, {USB_DEVICE(0x093a, 0x2624), .driver_info = FL_VFLIP}, {USB_DEVICE(0x093a, 0x2625)}, {USB_DEVICE(0x093a, 0x2626)}, From 5c763edfe4879ffc3a87fef64f743d4b5497aabb Mon Sep 17 00:00:00 2001 From: Olivier Sobrie Date: Mon, 14 Jul 2014 12:08:49 +0200 Subject: [PATCH 191/268] hso: remove unused workqueue The workqueue "retry_unthrottle_workqueue" is not scheduled anywhere in the code. So, remove it. Signed-off-by: Olivier Sobrie Signed-off-by: David S. Miller --- drivers/net/usb/hso.c | 12 ------------ 1 file changed, 12 deletions(-) diff --git a/drivers/net/usb/hso.c b/drivers/net/usb/hso.c index a3a05869309df..9ca2b418a3eec 100644 --- a/drivers/net/usb/hso.c +++ b/drivers/net/usb/hso.c @@ -261,7 +261,6 @@ struct hso_serial { u16 curr_rx_urb_offset; u8 rx_urb_filled[MAX_RX_URBS]; struct tasklet_struct unthrottle_tasklet; - struct work_struct retry_unthrottle_workqueue; }; struct hso_device { @@ -1252,14 +1251,6 @@ static void hso_unthrottle(struct tty_struct *tty) tasklet_hi_schedule(&serial->unthrottle_tasklet); } -static void hso_unthrottle_workfunc(struct work_struct *work) -{ - struct hso_serial *serial = - container_of(work, struct hso_serial, - retry_unthrottle_workqueue); - hso_unthrottle_tasklet(serial); -} - /* open the requested serial port */ static int hso_serial_open(struct tty_struct *tty, struct file *filp) { @@ -1295,8 +1286,6 @@ static int hso_serial_open(struct tty_struct *tty, struct file *filp) tasklet_init(&serial->unthrottle_tasklet, (void (*)(unsigned long))hso_unthrottle_tasklet, (unsigned long)serial); - INIT_WORK(&serial->retry_unthrottle_workqueue, - hso_unthrottle_workfunc); result = hso_start_serial_device(serial->parent, GFP_KERNEL); if (result) { hso_stop_serial_device(serial->parent); @@ -1345,7 +1334,6 @@ static void hso_serial_close(struct tty_struct *tty, struct file *filp) if (!usb_gone) hso_stop_serial_device(serial->parent); tasklet_kill(&serial->unthrottle_tasklet); - cancel_work_sync(&serial->retry_unthrottle_workqueue); } if (!usb_gone) From 8f9818af4eaef1150282e18355aaea425474a411 Mon Sep 17 00:00:00 2001 From: Olivier Sobrie Date: Mon, 14 Jul 2014 12:08:50 +0200 Subject: [PATCH 192/268] hso: fix deadlock when receiving bursts of data When the module sends bursts of data, sometimes a deadlock happens in the hso driver when the tty buffer doesn't get the chance to be flushed quickly enough. Remove the endless while loop in function put_rxbuf_data() which is called by the urb completion handler. If there isn't enough room in the tty buffer, discards all the data received in the URB. Cc: David Miller Cc: David Laight Cc: One Thousand Gnomes Cc: Dan Williams Cc: Jan Dumon Signed-off-by: Olivier Sobrie Acked-by: Alan Cox Signed-off-by: David S. Miller --- drivers/net/usb/hso.c | 38 +++++++++++++++++--------------------- 1 file changed, 17 insertions(+), 21 deletions(-) diff --git a/drivers/net/usb/hso.c b/drivers/net/usb/hso.c index 9ca2b418a3eec..a4272ed62da86 100644 --- a/drivers/net/usb/hso.c +++ b/drivers/net/usb/hso.c @@ -258,7 +258,6 @@ struct hso_serial { * so as not to drop characters on the floor. */ int curr_rx_urb_idx; - u16 curr_rx_urb_offset; u8 rx_urb_filled[MAX_RX_URBS]; struct tasklet_struct unthrottle_tasklet; }; @@ -2001,8 +2000,7 @@ static void ctrl_callback(struct urb *urb) static int put_rxbuf_data(struct urb *urb, struct hso_serial *serial) { struct tty_struct *tty; - int write_length_remaining = 0; - int curr_write_len; + int count; /* Sanity check */ if (urb == NULL || serial == NULL) { @@ -2012,29 +2010,28 @@ static int put_rxbuf_data(struct urb *urb, struct hso_serial *serial) tty = tty_port_tty_get(&serial->port); + if (tty && test_bit(TTY_THROTTLED, &tty->flags)) { + tty_kref_put(tty); + return -1; + } + /* Push data to tty */ - write_length_remaining = urb->actual_length - - serial->curr_rx_urb_offset; D1("data to push to tty"); - while (write_length_remaining) { - if (tty && test_bit(TTY_THROTTLED, &tty->flags)) { - tty_kref_put(tty); - return -1; - } - curr_write_len = tty_insert_flip_string(&serial->port, - urb->transfer_buffer + serial->curr_rx_urb_offset, - write_length_remaining); - serial->curr_rx_urb_offset += curr_write_len; - write_length_remaining -= curr_write_len; + count = tty_buffer_request_room(&serial->port, urb->actual_length); + if (count >= urb->actual_length) { + tty_insert_flip_string(&serial->port, urb->transfer_buffer, + urb->actual_length); tty_flip_buffer_push(&serial->port); + } else { + dev_warn(&serial->parent->usb->dev, + "dropping data, %d bytes lost\n", urb->actual_length); } + tty_kref_put(tty); - if (write_length_remaining == 0) { - serial->curr_rx_urb_offset = 0; - serial->rx_urb_filled[hso_urb_to_index(serial, urb)] = 0; - } - return write_length_remaining; + serial->rx_urb_filled[hso_urb_to_index(serial, urb)] = 0; + + return 0; } @@ -2205,7 +2202,6 @@ static int hso_stop_serial_device(struct hso_device *hso_dev) } } serial->curr_rx_urb_idx = 0; - serial->curr_rx_urb_offset = 0; if (serial->tx_urb) usb_kill_urb(serial->tx_urb); From bb78e7a12aa4898901d44bcb9d418f4188594427 Mon Sep 17 00:00:00 2001 From: Martin Peres Date: Mon, 14 Jul 2014 11:12:56 +0200 Subject: [PATCH 193/268] drm/nouveau/therm: fix a potential deadlock in the therm monitoring code Signed-off-by: Martin Peres Tested-by: Stefan Ringel Signed-off-by: Ben Skeggs --- drivers/gpu/drm/nouveau/core/subdev/therm/temp.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/nouveau/core/subdev/therm/temp.c b/drivers/gpu/drm/nouveau/core/subdev/therm/temp.c index cfde9eb44ad01..6212537b90c5b 100644 --- a/drivers/gpu/drm/nouveau/core/subdev/therm/temp.c +++ b/drivers/gpu/drm/nouveau/core/subdev/therm/temp.c @@ -192,11 +192,11 @@ alarm_timer_callback(struct nouveau_alarm *alarm) nouveau_therm_threshold_hyst_polling(therm, &sensor->thrs_shutdown, NOUVEAU_THERM_THRS_SHUTDOWN); + spin_unlock_irqrestore(&priv->sensor.alarm_program_lock, flags); + /* schedule the next poll in one second */ if (therm->temp_get(therm) >= 0 && list_empty(&alarm->head)) - ptimer->alarm(ptimer, 1000 * 1000 * 1000, alarm); - - spin_unlock_irqrestore(&priv->sensor.alarm_program_lock, flags); + ptimer->alarm(ptimer, 1000000000ULL, alarm); } void From 4320f6b1d9db4ca912c5eb6ecb328b2e090e1586 Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Tue, 15 Jul 2014 08:51:27 +0200 Subject: [PATCH 194/268] PM / sleep: Fix request_firmware() error at resume The commit [247bc037: PM / Sleep: Mitigate race between the freezer and request_firmware()] introduced the finer state control, but it also leads to a new bug; for example, a bug report regarding the firmware loading of intel BT device at suspend/resume: https://bugzilla.novell.com/show_bug.cgi?id=873790 The root cause seems to be a small window between the process resume and the clear of usermodehelper lock. The request_firmware() function checks the UMH lock and gives up when it's in UMH_DISABLE state. This is for avoiding the invalid f/w loading during suspend/resume phase. The problem is, however, that usermodehelper_enable() is called at the end of thaw_processes(). Thus, a thawed process in between can kick off the f/w loader code path (in this case, via btusb_setup_intel()) even before the call of usermodehelper_enable(). Then usermodehelper_read_trylock() returns an error and request_firmware() spews WARN_ON() in the end. This oneliner patch fixes the issue just by setting to UMH_FREEZING state again before restarting tasks, so that the call of request_firmware() will be blocked until the end of this function instead of returning an error. Fixes: 247bc0374254 (PM / Sleep: Mitigate race between the freezer and request_firmware()) Link: https://bugzilla.novell.com/show_bug.cgi?id=873790 Cc: 3.4+ # 3.4+ Signed-off-by: Takashi Iwai Signed-off-by: Rafael J. Wysocki --- kernel/power/process.c | 1 + 1 file changed, 1 insertion(+) diff --git a/kernel/power/process.c b/kernel/power/process.c index 0ca8d83e2369e..4ee194eb524b3 100644 --- a/kernel/power/process.c +++ b/kernel/power/process.c @@ -186,6 +186,7 @@ void thaw_processes(void) printk("Restarting tasks ... "); + __usermodehelper_set_disable_depth(UMH_FREEZING); thaw_workqueues(); read_lock(&tasklist_lock); From 653a3538f865d26350727df25397bee2bacde773 Mon Sep 17 00:00:00 2001 From: Zhang Rui Date: Tue, 15 Jul 2014 14:26:13 +0200 Subject: [PATCH 195/268] PM / sleep: fix freeze_ops NULL pointer dereferences This patch fixes a NULL pointer dereference issue introduced by commit 1f0b63866fc1 (ACPI / PM: Hold ACPI scan lock over the "freeze" sleep state). Fixes: 1f0b63866fc1 (ACPI / PM: Hold ACPI scan lock over the "freeze" sleep state) Link: http://marc.info/?l=linux-pm&m=140541182017443&w=2 Reported-and-tested-by: Alexander Stein Signed-off-by: Zhang Rui Signed-off-by: Rafael J. Wysocki --- kernel/power/suspend.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/kernel/power/suspend.c b/kernel/power/suspend.c index 4dd8822f732a2..ed35a4790afe1 100644 --- a/kernel/power/suspend.c +++ b/kernel/power/suspend.c @@ -306,7 +306,7 @@ int suspend_devices_and_enter(suspend_state_t state) error = suspend_ops->begin(state); if (error) goto Close; - } else if (state == PM_SUSPEND_FREEZE && freeze_ops->begin) { + } else if (state == PM_SUSPEND_FREEZE && freeze_ops && freeze_ops->begin) { error = freeze_ops->begin(); if (error) goto Close; @@ -335,7 +335,7 @@ int suspend_devices_and_enter(suspend_state_t state) Close: if (need_suspend_ops(state) && suspend_ops->end) suspend_ops->end(); - else if (state == PM_SUSPEND_FREEZE && freeze_ops->end) + else if (state == PM_SUSPEND_FREEZE && freeze_ops && freeze_ops->end) freeze_ops->end(); return error; From 4da63c6fc426023d1a20e45508c47d7d68c6a53d Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Tue, 15 Jul 2014 15:19:43 +0200 Subject: [PATCH 196/268] ALSA: hda - Fix broken PM due to incomplete i915 initialization When the initialization of Intel HDMI controller fails due to missing i915 kernel symbols (e.g. HD-audio is built in while i915 is module), the driver discontinues the probe. However, since the probe was done asynchronously, the driver object still remains, thus the relevant PM ops are still called at suspend/resume. This results in the bad access to the incomplete audio card object, eventually leads to Oops or stall at PM. This patch adds the missing checks of chip->init_failed flag at each PM callback in order to fix the problem above. Bugzilla: https://bugzilla.kernel.org/show_bug.cgi?id=79561 Cc: Signed-off-by: Takashi Iwai --- sound/pci/hda/hda_intel.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/sound/pci/hda/hda_intel.c b/sound/pci/hda/hda_intel.c index d690c26a197c8..83cd19017cf38 100644 --- a/sound/pci/hda/hda_intel.c +++ b/sound/pci/hda/hda_intel.c @@ -596,7 +596,7 @@ static int azx_suspend(struct device *dev) struct azx *chip = card->private_data; struct azx_pcm *p; - if (chip->disabled) + if (chip->disabled || chip->init_failed) return 0; snd_power_change_state(card, SNDRV_CTL_POWER_D3hot); @@ -628,7 +628,7 @@ static int azx_resume(struct device *dev) struct snd_card *card = dev_get_drvdata(dev); struct azx *chip = card->private_data; - if (chip->disabled) + if (chip->disabled || chip->init_failed) return 0; if (chip->driver_caps & AZX_DCAPS_I915_POWERWELL) { @@ -665,7 +665,7 @@ static int azx_runtime_suspend(struct device *dev) struct snd_card *card = dev_get_drvdata(dev); struct azx *chip = card->private_data; - if (chip->disabled) + if (chip->disabled || chip->init_failed) return 0; if (!(chip->driver_caps & AZX_DCAPS_PM_RUNTIME)) @@ -692,7 +692,7 @@ static int azx_runtime_resume(struct device *dev) struct hda_codec *codec; int status; - if (chip->disabled) + if (chip->disabled || chip->init_failed) return 0; if (!(chip->driver_caps & AZX_DCAPS_PM_RUNTIME)) @@ -729,7 +729,7 @@ static int azx_runtime_idle(struct device *dev) struct snd_card *card = dev_get_drvdata(dev); struct azx *chip = card->private_data; - if (chip->disabled) + if (chip->disabled || chip->init_failed) return 0; if (!power_save_controller || From 8abfb8727f4a724d31f9ccfd8013fbd16d539445 Mon Sep 17 00:00:00 2001 From: "zhangwei(Jovi)" Date: Thu, 18 Jul 2013 16:31:05 +0800 Subject: [PATCH 197/268] tracing: Add ftrace_trace_stack into __trace_puts/__trace_bputs Currently trace option stacktrace is not applicable for trace_printk with constant string argument, the reason is in __trace_puts/__trace_bputs ftrace_trace_stack is missing. In contrast, when using trace_printk with non constant string argument(will call into __trace_printk/__trace_bprintk), then trace option stacktrace is workable, this inconstant result will confuses users a lot. Link: http://lkml.kernel.org/p/51E7A7C9.9040401@huawei.com Cc: stable@vger.kernel.org # 3.10+ Signed-off-by: zhangwei(Jovi) Signed-off-by: Steven Rostedt --- kernel/trace/trace.c | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index f243444a37729..a6ffc8918ddac 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -466,6 +466,9 @@ int __trace_puts(unsigned long ip, const char *str, int size) struct print_entry *entry; unsigned long irq_flags; int alloc; + int pc; + + pc = preempt_count(); if (unlikely(tracing_selftest_running || tracing_disabled)) return 0; @@ -475,7 +478,7 @@ int __trace_puts(unsigned long ip, const char *str, int size) local_save_flags(irq_flags); buffer = global_trace.trace_buffer.buffer; event = trace_buffer_lock_reserve(buffer, TRACE_PRINT, alloc, - irq_flags, preempt_count()); + irq_flags, pc); if (!event) return 0; @@ -492,6 +495,7 @@ int __trace_puts(unsigned long ip, const char *str, int size) entry->buf[size] = '\0'; __buffer_unlock_commit(buffer, event); + ftrace_trace_stack(buffer, irq_flags, 4, pc); return size; } @@ -509,6 +513,9 @@ int __trace_bputs(unsigned long ip, const char *str) struct bputs_entry *entry; unsigned long irq_flags; int size = sizeof(struct bputs_entry); + int pc; + + pc = preempt_count(); if (unlikely(tracing_selftest_running || tracing_disabled)) return 0; @@ -516,7 +523,7 @@ int __trace_bputs(unsigned long ip, const char *str) local_save_flags(irq_flags); buffer = global_trace.trace_buffer.buffer; event = trace_buffer_lock_reserve(buffer, TRACE_BPUTS, size, - irq_flags, preempt_count()); + irq_flags, pc); if (!event) return 0; @@ -525,6 +532,7 @@ int __trace_bputs(unsigned long ip, const char *str) entry->str = str; __buffer_unlock_commit(buffer, event); + ftrace_trace_stack(buffer, irq_flags, 4, pc); return 1; } From 5f8bf2d263a20b986225ae1ed7d6759dc4b93af9 Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (Red Hat)" Date: Tue, 15 Jul 2014 11:05:12 -0400 Subject: [PATCH 198/268] tracing: Fix graph tracer with stack tracer on other archs Running my ftrace tests on PowerPC, it failed the test that checks if function_graph tracer is affected by the stack tracer. It was. Looking into this, I found that the update_function_graph_func() must be called even if the trampoline function is not changed. This is because archs like PowerPC do not support ftrace_ops being passed by assembly and instead uses a helper function (what the trampoline function points to). Since this function is not changed even when multiple ftrace_ops are added to the code, the test that falls out before calling update_function_graph_func() will miss that the update must still be done. Call update_function_graph_function() for all calls to update_ftrace_function() Cc: stable@vger.kernel.org # 3.3+ Signed-off-by: Steven Rostedt --- kernel/trace/ftrace.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c index 5b372e3ed675c..ac9d1dad630b3 100644 --- a/kernel/trace/ftrace.c +++ b/kernel/trace/ftrace.c @@ -265,12 +265,12 @@ static void update_ftrace_function(void) func = ftrace_ops_list_func; } + update_function_graph_func(); + /* If there's no change, then do nothing more here */ if (ftrace_trace_function == func) return; - update_function_graph_func(); - /* * If we are using the list function, it doesn't care * about the function_trace_ops. From eec7e1c16d2b65e38137686dd9b7e102c2150905 Mon Sep 17 00:00:00 2001 From: Alexey Asemov Date: Tue, 15 Jul 2014 10:28:42 +0400 Subject: [PATCH 199/268] libata: EH should handle AMNF error condition as a media error libata-eh.c should handle AMNF error condition (error byte bit 0, usually code 0x01) in libata-eh.c along with UNC as a media error so SCSI stack can handle it properly (translation code 0x01 is already present in libata-scsi.c) but was never passed down due to lack of handling in EH. While using linux-based machine (AMD 6550M-based notebook, PCI IDs for the controller are 1022:7801 subsys 1025:059d) and ddrescue to salvage data from failing hard drive (WD7500BPVT 2.5" 750G SATA2), I've found that pure AMNF 0x01 error code generates generic "device error" that is retried several times by SCSI stack instead of "media error" that is passed up to software. So we may assume deprecated AMNF error code is surely not dead yet, and it's better for it to be handled properly. As we may see it is used by modern enough devices, and used properly: drive returned AMNF only when IDs for track cannot be read completely due to dying head or positioning, otherwise it returned UNC(orrectables). Not handling it causes wrong generic error code ("device error") reporting down the stack, can damage failing drives further because of excessive retries, and slows salvaging down a lot. Also, there is handling code in libata-scsi.c for 0x01 AMNF error already. https://bugzilla.kernel.org/show_bug.cgi?id=80031 tj: Shortened $SUBJ and moved its content to the first paragraph. Signed-off-by: Alexey Asemov Signed-off-by: Tejun Heo --- drivers/ata/libata-eh.c | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/drivers/ata/libata-eh.c b/drivers/ata/libata-eh.c index 6760fc4e85b8c..dad83df555c49 100644 --- a/drivers/ata/libata-eh.c +++ b/drivers/ata/libata-eh.c @@ -1811,7 +1811,7 @@ static unsigned int ata_eh_analyze_tf(struct ata_queued_cmd *qc, case ATA_DEV_ATA: if (err & ATA_ICRC) qc->err_mask |= AC_ERR_ATA_BUS; - if (err & ATA_UNC) + if (err & (ATA_UNC | ATA_AMNF)) qc->err_mask |= AC_ERR_MEDIA; if (err & ATA_IDNF) qc->err_mask |= AC_ERR_INVALID; @@ -2556,11 +2556,12 @@ static void ata_eh_link_report(struct ata_link *link) } if (cmd->command != ATA_CMD_PACKET && - (res->feature & (ATA_ICRC | ATA_UNC | ATA_IDNF | - ATA_ABORTED))) - ata_dev_err(qc->dev, "error: { %s%s%s%s}\n", + (res->feature & (ATA_ICRC | ATA_UNC | ATA_AMNF | + ATA_IDNF | ATA_ABORTED))) + ata_dev_err(qc->dev, "error: { %s%s%s%s%s}\n", res->feature & ATA_ICRC ? "ICRC " : "", res->feature & ATA_UNC ? "UNC " : "", + res->feature & ATA_AMNF ? "AMNF " : "", res->feature & ATA_IDNF ? "IDNF " : "", res->feature & ATA_ABORTED ? "ABRT " : ""); #endif From f0160a5a2912267c02cfe692eac955c360de5fdf Mon Sep 17 00:00:00 2001 From: "zhangwei(Jovi)" Date: Thu, 18 Jul 2013 16:31:18 +0800 Subject: [PATCH 200/268] tracing: Add TRACE_ITER_PRINTK flag check in __trace_puts/__trace_bputs The TRACE_ITER_PRINTK check in __trace_puts/__trace_bputs is missing, so add it, to be consistent with __trace_printk/__trace_bprintk. Those functions are all called by the same function: trace_printk(). Link: http://lkml.kernel.org/p/51E7A7D6.8090900@huawei.com Cc: stable@vger.kernel.org # 3.11+ Signed-off-by: zhangwei(Jovi) Signed-off-by: Steven Rostedt --- kernel/trace/trace.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index a6ffc8918ddac..bda9621638ccc 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -468,6 +468,9 @@ int __trace_puts(unsigned long ip, const char *str, int size) int alloc; int pc; + if (!(trace_flags & TRACE_ITER_PRINTK)) + return 0; + pc = preempt_count(); if (unlikely(tracing_selftest_running || tracing_disabled)) @@ -515,6 +518,9 @@ int __trace_bputs(unsigned long ip, const char *str) int size = sizeof(struct bputs_entry); int pc; + if (!(trace_flags & TRACE_ITER_PRINTK)) + return 0; + pc = preempt_count(); if (unlikely(tracing_selftest_running || tracing_disabled)) From 9aec8629ec829fc9403788cd959e05dd87988bd1 Mon Sep 17 00:00:00 2001 From: Mike Snitzer Date: Mon, 14 Jul 2014 16:35:54 -0400 Subject: [PATCH 201/268] dm thin metadata: do not allow the data block size to change The block size for the thin-pool's data device must remained fixed for the life of the thin-pool. Disallow any attempt to change the thin-pool's data block size. It should be noted that attempting to change the data block size via thin-pool table reload will be ignored as a side-effect of the thin-pool handover that the thin-pool target does during thin-pool table reload. Here is an example outcome of attempting to load a thin-pool table that reduced the thin-pool's data block size from 1024K to 512K. Before: kernel: device-mapper: thin: 253:4: growing the data device from 204800 to 409600 blocks After: kernel: device-mapper: thin metadata: changing the data block size (from 2048 to 1024) is not supported kernel: device-mapper: table: 253:4: thin-pool: Error creating metadata object kernel: device-mapper: ioctl: error adding target to table Signed-off-by: Mike Snitzer Acked-by: Joe Thornber Cc: stable@vger.kernel.org --- drivers/md/dm-thin-metadata.c | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/drivers/md/dm-thin-metadata.c b/drivers/md/dm-thin-metadata.c index b086a945edcbc..e9d33ad59df5e 100644 --- a/drivers/md/dm-thin-metadata.c +++ b/drivers/md/dm-thin-metadata.c @@ -613,6 +613,15 @@ static int __open_metadata(struct dm_pool_metadata *pmd) disk_super = dm_block_data(sblock); + /* Verify the data block size hasn't changed */ + if (le32_to_cpu(disk_super->data_block_size) != pmd->data_block_size) { + DMERR("changing the data block size (from %u to %llu) is not supported", + le32_to_cpu(disk_super->data_block_size), + (unsigned long long)pmd->data_block_size); + r = -EINVAL; + goto bad_unlock_sblock; + } + r = __check_incompat_features(disk_super, pmd); if (r < 0) goto bad_unlock_sblock; From 048e5a07f282c57815b3901d4a68a77fa131ce0a Mon Sep 17 00:00:00 2001 From: Mike Snitzer Date: Mon, 14 Jul 2014 16:59:39 -0400 Subject: [PATCH 202/268] dm cache metadata: do not allow the data block size to change The block size for the dm-cache's data device must remained fixed for the life of the cache. Disallow any attempt to change the cache's data block size. Signed-off-by: Mike Snitzer Acked-by: Joe Thornber Cc: stable@vger.kernel.org --- drivers/md/dm-cache-metadata.c | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/drivers/md/dm-cache-metadata.c b/drivers/md/dm-cache-metadata.c index 4ead4ba606562..d2899e7eb3aaf 100644 --- a/drivers/md/dm-cache-metadata.c +++ b/drivers/md/dm-cache-metadata.c @@ -425,6 +425,15 @@ static int __open_metadata(struct dm_cache_metadata *cmd) disk_super = dm_block_data(sblock); + /* Verify the data block size hasn't changed */ + if (le32_to_cpu(disk_super->data_block_size) != cmd->data_block_size) { + DMERR("changing the data block size (from %u to %llu) is not supported", + le32_to_cpu(disk_super->data_block_size), + (unsigned long long)cmd->data_block_size); + r = -EINVAL; + goto bad; + } + r = __check_incompat_features(disk_super, cmd); if (r < 0) goto bad; From d68aab6b8f572406aa93b45ef6483934dd3b54a6 Mon Sep 17 00:00:00 2001 From: Niu Yawei Date: Wed, 4 Jun 2014 12:22:13 +0800 Subject: [PATCH 203/268] quota: missing lock in dqcache_shrink_scan() Commit 1ab6c4997e04 (fs: convert fs shrinkers to new scan/count API) accidentally removed locking from quota shrinker. Fix it - dqcache_shrink_scan() should use dq_list_lock to protect the scan on free_dquots list. CC: stable@vger.kernel.org Fixes: 1ab6c4997e04a00c50c6d786c2f046adc0d1f5de Signed-off-by: Niu Yawei Signed-off-by: Jan Kara --- fs/quota/dquot.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/fs/quota/dquot.c b/fs/quota/dquot.c index 9cd5f63715c0e..7f30bdc57d13b 100644 --- a/fs/quota/dquot.c +++ b/fs/quota/dquot.c @@ -702,6 +702,7 @@ dqcache_shrink_scan(struct shrinker *shrink, struct shrink_control *sc) struct dquot *dquot; unsigned long freed = 0; + spin_lock(&dq_list_lock); head = free_dquots.prev; while (head != &free_dquots && sc->nr_to_scan) { dquot = list_entry(head, struct dquot, dq_free); @@ -713,6 +714,7 @@ dqcache_shrink_scan(struct shrinker *shrink, struct shrink_control *sc) freed++; head = free_dquots.prev; } + spin_unlock(&dq_list_lock); return freed; } From 97b8ee845393701edc06e27ccec2876ff9596019 Mon Sep 17 00:00:00 2001 From: Martin Lau Date: Mon, 9 Jun 2014 23:06:42 -0700 Subject: [PATCH 204/268] ring-buffer: Fix polling on trace_pipe ring_buffer_poll_wait() should always put the poll_table to its wait_queue even there is immediate data available. Otherwise, the following epoll and read sequence will eventually hang forever: 1. Put some data to make the trace_pipe ring_buffer read ready first 2. epoll_ctl(efd, EPOLL_CTL_ADD, trace_pipe_fd, ee) 3. epoll_wait() 4. read(trace_pipe_fd) till EAGAIN 5. Add some more data to the trace_pipe ring_buffer 6. epoll_wait() -> this epoll_wait() will block forever ~ During the epoll_ctl(efd, EPOLL_CTL_ADD,...) call in step 2, ring_buffer_poll_wait() returns immediately without adding poll_table, which has poll_table->_qproc pointing to ep_poll_callback(), to its wait_queue. ~ During the epoll_wait() call in step 3 and step 6, ring_buffer_poll_wait() cannot add ep_poll_callback() to its wait_queue because the poll_table->_qproc is NULL and it is how epoll works. ~ When there is new data available in step 6, ring_buffer does not know it has to call ep_poll_callback() because it is not in its wait queue. Hence, block forever. Other poll implementation seems to call poll_wait() unconditionally as the very first thing to do. For example, tcp_poll() in tcp.c. Link: http://lkml.kernel.org/p/20140610060637.GA14045@devbig242.prn2.facebook.com Cc: stable@vger.kernel.org # 2.6.27 Fixes: 2a2cc8f7c4d0 "ftrace: allow the event pipe to be polled" Reviewed-by: Chris Mason Signed-off-by: Martin Lau Signed-off-by: Steven Rostedt --- kernel/trace/ring_buffer.c | 4 ---- 1 file changed, 4 deletions(-) diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c index 7c56c3d069430..ff7027199a9a3 100644 --- a/kernel/trace/ring_buffer.c +++ b/kernel/trace/ring_buffer.c @@ -616,10 +616,6 @@ int ring_buffer_poll_wait(struct ring_buffer *buffer, int cpu, struct ring_buffer_per_cpu *cpu_buffer; struct rb_irq_work *work; - if ((cpu == RING_BUFFER_ALL_CPUS && !ring_buffer_empty(buffer)) || - (cpu != RING_BUFFER_ALL_CPUS && !ring_buffer_empty_cpu(buffer, cpu))) - return POLLIN | POLLRDNORM; - if (cpu == RING_BUFFER_ALL_CPUS) work = &buffer->irq_work; else { From 44305ebde243a7cce2c592cc89afe5041d8bf884 Mon Sep 17 00:00:00 2001 From: Brian Norris Date: Tue, 20 May 2014 22:35:38 -0700 Subject: [PATCH 205/268] UBI: fastmap: do not miss bit-flips The return value from 'ubi_io_read_ec_hdr()' was stored in 'err', not in 'ret'. This fix makes sure Fastmap-enabled UBI does not miss bit-flip while reading EC headers, events and scrubs the affected PEBs. This issue was reported by Coverity Scan. Artem: improved the commit message. Signed-off-by: Brian Norris Acked-by: Richard Weinberger Signed-off-by: Artem Bityutskiy --- drivers/mtd/ubi/fastmap.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/mtd/ubi/fastmap.c b/drivers/mtd/ubi/fastmap.c index 72f39daee649e..0431b46d9fd9e 100644 --- a/drivers/mtd/ubi/fastmap.c +++ b/drivers/mtd/ubi/fastmap.c @@ -423,7 +423,7 @@ static int scan_pool(struct ubi_device *ubi, struct ubi_attach_info *ai, pnum, err); ret = err > 0 ? UBI_BAD_FASTMAP : err; goto out; - } else if (ret == UBI_IO_BITFLIPS) + } else if (err == UBI_IO_BITFLIPS) scrub = 1; /* From 4a36b44c77515ca1ad799577d3f9e2fa4d68bffa Mon Sep 17 00:00:00 2001 From: David Hildenbrand Date: Wed, 18 Jun 2014 12:32:19 +0200 Subject: [PATCH 206/268] s390: require mvcos facility, not tod clock steering facility Inlined uaccess functions require the mvcos facility (bit 27), not the tod clock steering facility (bit 28) for z10 and newer machines. Signed-off-by: David Hildenbrand Signed-off-by: Heiko Carstens Signed-off-by: Martin Schwidefsky --- arch/s390/kernel/head.S | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/arch/s390/kernel/head.S b/arch/s390/kernel/head.S index 7ba7d6784510c..e88d35d749501 100644 --- a/arch/s390/kernel/head.S +++ b/arch/s390/kernel/head.S @@ -437,11 +437,11 @@ ENTRY(startup_kdump) #if defined(CONFIG_64BIT) #if defined(CONFIG_MARCH_ZEC12) - .long 3, 0xc100efea, 0xf46ce800, 0x00400000 + .long 3, 0xc100eff2, 0xf46ce800, 0x00400000 #elif defined(CONFIG_MARCH_Z196) - .long 2, 0xc100efea, 0xf46c0000 + .long 2, 0xc100eff2, 0xf46c0000 #elif defined(CONFIG_MARCH_Z10) - .long 2, 0xc100efea, 0xf0680000 + .long 2, 0xc100eff2, 0xf0680000 #elif defined(CONFIG_MARCH_Z9_109) .long 1, 0xc100efc2 #elif defined(CONFIG_MARCH_Z990) From 7cbe4afe854a9a352d9562106449bc55d17d5e5b Mon Sep 17 00:00:00 2001 From: Martin Schwidefsky Date: Thu, 26 Jun 2014 10:48:56 +0200 Subject: [PATCH 207/268] s390/3270: correct size detection with the read-partition command The size detection for 3270 terminals with the read-partition command is broken. The raw3270_reset_device_cb function clears the init_data array, but if raw3270_writesf_readpart has been called the read-partition command is queued which needs the init_data array. In this case the size detection will fail and the invalid command does funny things to the terminal. Signed-off-by: Martin Schwidefsky --- drivers/s390/char/raw3270.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/s390/char/raw3270.c b/drivers/s390/char/raw3270.c index 15b3459f86562..220acb4cbee52 100644 --- a/drivers/s390/char/raw3270.c +++ b/drivers/s390/char/raw3270.c @@ -633,7 +633,6 @@ raw3270_reset_device_cb(struct raw3270_request *rq, void *data) } else raw3270_writesf_readpart(rp); memset(&rp->init_reset, 0, sizeof(rp->init_reset)); - memset(&rp->init_data, 0, sizeof(rp->init_data)); } static int From 8fb878c5f12bf7fd6099d466139bd4564418e583 Mon Sep 17 00:00:00 2001 From: Yijing Wang Date: Tue, 8 Jul 2014 10:08:05 +0800 Subject: [PATCH 208/268] s390/MSI: Use standard mask and unmask funtions MSI irqchip in s390 has its own mask and unmask MSI irq functions, zpci_enable_irq() and zpci_disable_irq(). They mask and unmask MSI irq in standard ways, no arch special. MSI driver provides two global standard functions mask_msi_irq() and unmask_msi_irq(). Local zpci_enable_irq() and zpci_disable_irq() are almost the same as the standard two. the difference is local mask/unmask functions read the mask status before mask and unmask everytime. Then change the value and rewrite to hardware. In standard functions, save the mask status after mask and unmask msi irq, and use the cached status to change the mask status. When we mask or unmask a MSI irq, we always cache its mask status except we know need not to cache it, like in pci_msi_shutdown. So use the standard functions to replace the local is safe. Signed-off-by: Yijing Wang [sebott: fixed inverted function pointers] Signed-off-by: Sebastian Ott Signed-off-by: Martin Schwidefsky --- arch/s390/pci/pci.c | 49 ++++++--------------------------------------- 1 file changed, 6 insertions(+), 43 deletions(-) diff --git a/arch/s390/pci/pci.c b/arch/s390/pci/pci.c index 9ddc51eeb8d69..30de42730b2f2 100644 --- a/arch/s390/pci/pci.c +++ b/arch/s390/pci/pci.c @@ -48,13 +48,10 @@ static LIST_HEAD(zpci_list); static DEFINE_SPINLOCK(zpci_list_lock); -static void zpci_enable_irq(struct irq_data *data); -static void zpci_disable_irq(struct irq_data *data); - static struct irq_chip zpci_irq_chip = { .name = "zPCI", - .irq_unmask = zpci_enable_irq, - .irq_mask = zpci_disable_irq, + .irq_unmask = unmask_msi_irq, + .irq_mask = mask_msi_irq, }; static DECLARE_BITMAP(zpci_domain, ZPCI_NR_DEVICES); @@ -244,43 +241,6 @@ static int zpci_cfg_store(struct zpci_dev *zdev, int offset, u32 val, u8 len) return rc; } -static int zpci_msi_set_mask_bits(struct msi_desc *msi, u32 mask, u32 flag) -{ - int offset, pos; - u32 mask_bits; - - if (msi->msi_attrib.is_msix) { - offset = msi->msi_attrib.entry_nr * PCI_MSIX_ENTRY_SIZE + - PCI_MSIX_ENTRY_VECTOR_CTRL; - msi->masked = readl(msi->mask_base + offset); - writel(flag, msi->mask_base + offset); - } else if (msi->msi_attrib.maskbit) { - pos = (long) msi->mask_base; - pci_read_config_dword(msi->dev, pos, &mask_bits); - mask_bits &= ~(mask); - mask_bits |= flag & mask; - pci_write_config_dword(msi->dev, pos, mask_bits); - } else - return 0; - - msi->msi_attrib.maskbit = !!flag; - return 1; -} - -static void zpci_enable_irq(struct irq_data *data) -{ - struct msi_desc *msi = irq_get_msi_desc(data->irq); - - zpci_msi_set_mask_bits(msi, 1, 0); -} - -static void zpci_disable_irq(struct irq_data *data) -{ - struct msi_desc *msi = irq_get_msi_desc(data->irq); - - zpci_msi_set_mask_bits(msi, 1, 1); -} - void pcibios_fixup_bus(struct pci_bus *bus) { } @@ -487,7 +447,10 @@ void arch_teardown_msi_irqs(struct pci_dev *pdev) /* Release MSI interrupts */ list_for_each_entry(msi, &pdev->msi_list, list) { - zpci_msi_set_mask_bits(msi, 1, 1); + if (msi->msi_attrib.is_msix) + default_msix_mask_irq(msi, 1); + else + default_msi_mask_irq(msi, 1, 1); irq_set_msi_desc(msi->irq, NULL); irq_free_desc(msi->irq); msi->msg.address_lo = 0; From dab6cf55f81a6e16b8147aed9a843e1691dcd318 Mon Sep 17 00:00:00 2001 From: Martin Schwidefsky Date: Mon, 23 Jun 2014 15:29:40 +0200 Subject: [PATCH 209/268] s390/ptrace: fix PSW mask check The PSW mask check of the PTRACE_POKEUSR_AREA command is incorrect. The PSW_MASK_USER define contains the PSW_MASK_ASC bits, the ptrace interface accepts all combinations for the address-space-control bits. To protect the kernel space the PSW mask check in ptrace needs to reject the address-space-control bit combination for home space. Fixes CVE-2014-3534 Cc: stable@vger.kernel.org Signed-off-by: Martin Schwidefsky --- arch/s390/kernel/ptrace.c | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/arch/s390/kernel/ptrace.c b/arch/s390/kernel/ptrace.c index 2d716734b5b1b..5dc7ad9e2fbf2 100644 --- a/arch/s390/kernel/ptrace.c +++ b/arch/s390/kernel/ptrace.c @@ -334,9 +334,14 @@ static int __poke_user(struct task_struct *child, addr_t addr, addr_t data) unsigned long mask = PSW_MASK_USER; mask |= is_ri_task(child) ? PSW_MASK_RI : 0; - if ((data & ~mask) != PSW_USER_BITS) + if ((data ^ PSW_USER_BITS) & ~mask) + /* Invalid psw mask. */ + return -EINVAL; + if ((data & PSW_MASK_ASC) == PSW_ASC_HOME) + /* Invalid address-space-control bits */ return -EINVAL; if ((data & PSW_MASK_EA) && !(data & PSW_MASK_BA)) + /* Invalid addressing mode bits */ return -EINVAL; } *(addr_t *)((addr_t) &task_pt_regs(child)->psw + addr) = data; @@ -672,9 +677,12 @@ static int __poke_user_compat(struct task_struct *child, mask |= is_ri_task(child) ? PSW32_MASK_RI : 0; /* Build a 64 bit psw mask from 31 bit mask. */ - if ((tmp & ~mask) != PSW32_USER_BITS) + if ((tmp ^ PSW32_USER_BITS) & ~mask) /* Invalid psw mask. */ return -EINVAL; + if ((data & PSW32_MASK_ASC) == PSW32_ASC_HOME) + /* Invalid address-space-control bits */ + return -EINVAL; regs->psw.mask = (regs->psw.mask & ~PSW_MASK_USER) | (regs->psw.mask & PSW_MASK_BA) | (__u64)(tmp & mask) << 32; From 666e68e0dde826ae146b980099f1719f74fa968c Mon Sep 17 00:00:00 2001 From: Ingo Tuchscherer Date: Mon, 14 Jul 2014 19:11:48 +0200 Subject: [PATCH 210/268] s390/zcrypt: improve device probing for zcrypt adapter cards Improve device probing process for zcrypt adapters to transmit service request during registration process. Signed-off-by: Ingo Tuchscherer Signed-off-by: Martin Schwidefsky --- drivers/s390/crypto/ap_bus.c | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/drivers/s390/crypto/ap_bus.c b/drivers/s390/crypto/ap_bus.c index 69ef4f8cfac8c..4038437ff033f 100644 --- a/drivers/s390/crypto/ap_bus.c +++ b/drivers/s390/crypto/ap_bus.c @@ -901,10 +901,15 @@ static int ap_device_probe(struct device *dev) int rc; ap_dev->drv = ap_drv; + + spin_lock_bh(&ap_device_list_lock); + list_add(&ap_dev->list, &ap_device_list); + spin_unlock_bh(&ap_device_list_lock); + rc = ap_drv->probe ? ap_drv->probe(ap_dev) : -ENODEV; - if (!rc) { + if (rc) { spin_lock_bh(&ap_device_list_lock); - list_add(&ap_dev->list, &ap_device_list); + list_del_init(&ap_dev->list); spin_unlock_bh(&ap_device_list_lock); } return rc; From 9f86745722d95bc7f855069bd82285bd10dc97ff Mon Sep 17 00:00:00 2001 From: Martin Schwidefsky Date: Tue, 15 Jul 2014 10:41:37 +0200 Subject: [PATCH 211/268] s390: fix restore of invalid floating-point-control The fixup of the inline assembly to restore the floating-point-control register needs to check for instruction address *after* the lfcp instruction as the specification and data exceptions are suppresssing. Signed-off-by: Martin Schwidefsky --- arch/s390/include/asm/switch_to.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/s390/include/asm/switch_to.h b/arch/s390/include/asm/switch_to.h index df38c70cd59ef..18ea9e3f81429 100644 --- a/arch/s390/include/asm/switch_to.h +++ b/arch/s390/include/asm/switch_to.h @@ -51,8 +51,8 @@ static inline int restore_fp_ctl(u32 *fpc) return 0; asm volatile( - "0: lfpc %1\n" - " la %0,0\n" + " lfpc %1\n" + "0: la %0,0\n" "1:\n" EX_TABLE(0b,1b) : "=d" (rc) : "Q" (*fpc), "0" (-EINVAL)); From d3f44fbabe55132832e152606365adb640296378 Mon Sep 17 00:00:00 2001 From: Paul Bolle Date: Mon, 30 Jun 2014 16:32:29 +0200 Subject: [PATCH 212/268] x86: Remove unused variable "polling" Compile tested. "polling" is unused since commit f80c5b39b80a ("sched/idle, x86: Switch from TS_POLLING to TIF_POLLING_NRFLAG"). Signed-off-by: Paul Bolle Signed-off-by: Peter Zijlstra Cc: Jiri Kosina Link: http://lkml.kernel.org/r/1404138749.2978.6.camel@x41 Signed-off-by: Ingo Molnar --- arch/x86/kernel/apm_32.c | 1 - 1 file changed, 1 deletion(-) diff --git a/arch/x86/kernel/apm_32.c b/arch/x86/kernel/apm_32.c index f3a1f04ed4cb8..5848744514142 100644 --- a/arch/x86/kernel/apm_32.c +++ b/arch/x86/kernel/apm_32.c @@ -841,7 +841,6 @@ static int apm_do_idle(void) u32 eax; u8 ret = 0; int idled = 0; - int polling; int err = 0; if (!need_resched()) { From 37e9562453b813d2ea527bd9531fef2c3c592847 Mon Sep 17 00:00:00 2001 From: Jason Low Date: Fri, 4 Jul 2014 20:49:32 -0700 Subject: [PATCH 213/268] locking/rwsem: Allow conservative optimistic spinning when readers have lock Commit 4fc828e24cd9 ("locking/rwsem: Support optimistic spinning") introduced a major performance regression for workloads such as xfs_repair which mix read and write locking of the mmap_sem across many threads. The result was xfs_repair ran 5x slower on 3.16-rc2 than on 3.15 and using 20x more system CPU time. Perf profiles indicate in some workloads that significant time can be spent spinning on !owner. This is because we don't set the lock owner when readers(s) obtain the rwsem. In this patch, we'll modify rwsem_can_spin_on_owner() such that we'll return false if there is no lock owner. The rationale is that if we just entered the slowpath, yet there is no lock owner, then there is a possibility that a reader has the lock. To be conservative, we'll avoid spinning in these situations. This patch reduced the total run time of the xfs_repair workload from about 4 minutes 24 seconds down to approximately 1 minute 26 seconds, back to close to the same performance as on 3.15. Retesting of AIM7, which were some of the workloads used to test the original optimistic spinning code, confirmed that we still get big performance gains with optimistic spinning, even with this additional regression fix. Davidlohr found that while the 'custom' workload took a performance hit of ~-14% to throughput for >300 users with this additional patch, the overall gain with optimistic spinning is still ~+45%. The 'disk' workload even improved by ~+15% at >1000 users. Tested-by: Dave Chinner Acked-by: Davidlohr Bueso Signed-off-by: Jason Low Signed-off-by: Peter Zijlstra Cc: Tim Chen Cc: Linus Torvalds Link: http://lkml.kernel.org/r/1404532172.2572.30.camel@j-VirtualBox Signed-off-by: Ingo Molnar --- kernel/locking/rwsem-xadd.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/kernel/locking/rwsem-xadd.c b/kernel/locking/rwsem-xadd.c index dacc32142fcca..c40c7d28661d1 100644 --- a/kernel/locking/rwsem-xadd.c +++ b/kernel/locking/rwsem-xadd.c @@ -285,10 +285,10 @@ static inline bool rwsem_try_write_lock_unqueued(struct rw_semaphore *sem) static inline bool rwsem_can_spin_on_owner(struct rw_semaphore *sem) { struct task_struct *owner; - bool on_cpu = true; + bool on_cpu = false; if (need_resched()) - return 0; + return false; rcu_read_lock(); owner = ACCESS_ONCE(sem->owner); @@ -297,9 +297,9 @@ static inline bool rwsem_can_spin_on_owner(struct rw_semaphore *sem) rcu_read_unlock(); /* - * If sem->owner is not set, the rwsem owner may have - * just acquired it and not set the owner yet or the rwsem - * has been released. + * If sem->owner is not set, yet we have just recently entered the + * slowpath, then there is a possibility reader(s) may have the lock. + * To be safe, avoid spinning in these situations. */ return on_cpu; } From 046a619d8e9746fa4c0e29e8c6b78e16efc008a8 Mon Sep 17 00:00:00 2001 From: Jason Low Date: Mon, 14 Jul 2014 10:27:48 -0700 Subject: [PATCH 214/268] locking/spinlocks/mcs: Rename optimistic_spin_queue() to optimistic_spin_node() Currently, the per-cpu nodes structure for the cancellable MCS spinlock is named "optimistic_spin_queue". However, in a follow up patch in the series we will be introducing a new structure that serves as the new "handle" for the lock. It would make more sense if that structure is named "optimistic_spin_queue". Additionally, since the current use of the "optimistic_spin_queue" structure are "nodes", it might be better if we rename them to "node" anyway. This preparatory patch renames all current "optimistic_spin_queue" to "optimistic_spin_node". Signed-off-by: Jason Low Signed-off-by: Peter Zijlstra Cc: Scott Norton Cc: "Paul E. McKenney" Cc: Dave Chinner Cc: Waiman Long Cc: Davidlohr Bueso Cc: Rik van Riel Cc: Andrew Morton Cc: "H. Peter Anvin" Cc: Steven Rostedt Cc: Tim Chen Cc: Konrad Rzeszutek Wilk Cc: Aswin Chandramouleeswaran Cc: Linus Torvalds Cc: Chris Mason Cc: Heiko Carstens Cc: Josef Bacik Link: http://lkml.kernel.org/r/1405358872-3732-2-git-send-email-jason.low2@hp.com Signed-off-by: Ingo Molnar --- include/linux/mutex.h | 4 ++-- include/linux/rwsem.h | 4 ++-- kernel/locking/mcs_spinlock.c | 24 ++++++++++++------------ kernel/locking/mcs_spinlock.h | 8 ++++---- 4 files changed, 20 insertions(+), 20 deletions(-) diff --git a/include/linux/mutex.h b/include/linux/mutex.h index 11692dea18aa2..885f3f56a77fc 100644 --- a/include/linux/mutex.h +++ b/include/linux/mutex.h @@ -46,7 +46,7 @@ * - detects multi-task circular deadlocks and prints out all affected * locks and tasks (and only those tasks) */ -struct optimistic_spin_queue; +struct optimistic_spin_node; struct mutex { /* 1: unlocked, 0: locked, negative: locked, possible waiters */ atomic_t count; @@ -56,7 +56,7 @@ struct mutex { struct task_struct *owner; #endif #ifdef CONFIG_MUTEX_SPIN_ON_OWNER - struct optimistic_spin_queue *osq; /* Spinner MCS lock */ + struct optimistic_spin_node *osq; /* Spinner MCS lock */ #endif #ifdef CONFIG_DEBUG_MUTEXES const char *name; diff --git a/include/linux/rwsem.h b/include/linux/rwsem.h index 8d79708146aa4..ba3f108ddea1e 100644 --- a/include/linux/rwsem.h +++ b/include/linux/rwsem.h @@ -16,7 +16,7 @@ #include -struct optimistic_spin_queue; +struct optimistic_spin_node; struct rw_semaphore; #ifdef CONFIG_RWSEM_GENERIC_SPINLOCK @@ -33,7 +33,7 @@ struct rw_semaphore { * if the owner is running on the cpu. */ struct task_struct *owner; - struct optimistic_spin_queue *osq; /* spinner MCS lock */ + struct optimistic_spin_node *osq; /* spinner MCS lock */ #endif #ifdef CONFIG_DEBUG_LOCK_ALLOC struct lockdep_map dep_map; diff --git a/kernel/locking/mcs_spinlock.c b/kernel/locking/mcs_spinlock.c index 838dc9e006697..e9866f70e828a 100644 --- a/kernel/locking/mcs_spinlock.c +++ b/kernel/locking/mcs_spinlock.c @@ -14,18 +14,18 @@ * called from interrupt context and we have preemption disabled while * spinning. */ -static DEFINE_PER_CPU_SHARED_ALIGNED(struct optimistic_spin_queue, osq_node); +static DEFINE_PER_CPU_SHARED_ALIGNED(struct optimistic_spin_node, osq_node); /* * Get a stable @node->next pointer, either for unlock() or unqueue() purposes. * Can return NULL in case we were the last queued and we updated @lock instead. */ -static inline struct optimistic_spin_queue * -osq_wait_next(struct optimistic_spin_queue **lock, - struct optimistic_spin_queue *node, - struct optimistic_spin_queue *prev) +static inline struct optimistic_spin_node * +osq_wait_next(struct optimistic_spin_node **lock, + struct optimistic_spin_node *node, + struct optimistic_spin_node *prev) { - struct optimistic_spin_queue *next = NULL; + struct optimistic_spin_node *next = NULL; for (;;) { if (*lock == node && cmpxchg(lock, node, prev) == node) { @@ -59,10 +59,10 @@ osq_wait_next(struct optimistic_spin_queue **lock, return next; } -bool osq_lock(struct optimistic_spin_queue **lock) +bool osq_lock(struct optimistic_spin_node **lock) { - struct optimistic_spin_queue *node = this_cpu_ptr(&osq_node); - struct optimistic_spin_queue *prev, *next; + struct optimistic_spin_node *node = this_cpu_ptr(&osq_node); + struct optimistic_spin_node *prev, *next; node->locked = 0; node->next = NULL; @@ -149,10 +149,10 @@ bool osq_lock(struct optimistic_spin_queue **lock) return false; } -void osq_unlock(struct optimistic_spin_queue **lock) +void osq_unlock(struct optimistic_spin_node **lock) { - struct optimistic_spin_queue *node = this_cpu_ptr(&osq_node); - struct optimistic_spin_queue *next; + struct optimistic_spin_node *node = this_cpu_ptr(&osq_node); + struct optimistic_spin_node *next; /* * Fast path for the uncontended case. diff --git a/kernel/locking/mcs_spinlock.h b/kernel/locking/mcs_spinlock.h index a2dbac4aca6b2..c99dc0052f492 100644 --- a/kernel/locking/mcs_spinlock.h +++ b/kernel/locking/mcs_spinlock.h @@ -118,12 +118,12 @@ void mcs_spin_unlock(struct mcs_spinlock **lock, struct mcs_spinlock *node) * mutex_lock()/rwsem_down_{read,write}() etc. */ -struct optimistic_spin_queue { - struct optimistic_spin_queue *next, *prev; +struct optimistic_spin_node { + struct optimistic_spin_node *next, *prev; int locked; /* 1 if lock acquired */ }; -extern bool osq_lock(struct optimistic_spin_queue **lock); -extern void osq_unlock(struct optimistic_spin_queue **lock); +extern bool osq_lock(struct optimistic_spin_node **lock); +extern void osq_unlock(struct optimistic_spin_node **lock); #endif /* __LINUX_MCS_SPINLOCK_H */ From 90631822c5d307b5410500806e8ac3e63928aa3e Mon Sep 17 00:00:00 2001 From: Jason Low Date: Mon, 14 Jul 2014 10:27:49 -0700 Subject: [PATCH 215/268] locking/spinlocks/mcs: Convert osq lock to atomic_t to reduce overhead The cancellable MCS spinlock is currently used to queue threads that are doing optimistic spinning. It uses per-cpu nodes, where a thread obtaining the lock would access and queue the local node corresponding to the CPU that it's running on. Currently, the cancellable MCS lock is implemented by using pointers to these nodes. In this patch, instead of operating on pointers to the per-cpu nodes, we store the CPU numbers in which the per-cpu nodes correspond to in atomic_t. A similar concept is used with the qspinlock. By operating on the CPU # of the nodes using atomic_t instead of pointers to those nodes, this can reduce the overhead of the cancellable MCS spinlock by 32 bits (on 64 bit systems). Signed-off-by: Jason Low Signed-off-by: Peter Zijlstra Cc: Scott Norton Cc: "Paul E. McKenney" Cc: Dave Chinner Cc: Waiman Long Cc: Davidlohr Bueso Cc: Rik van Riel Cc: Andrew Morton Cc: "H. Peter Anvin" Cc: Steven Rostedt Cc: Tim Chen Cc: Konrad Rzeszutek Wilk Cc: Aswin Chandramouleeswaran Cc: Linus Torvalds Cc: Chris Mason Cc: Heiko Carstens Cc: Josef Bacik Link: http://lkml.kernel.org/r/1405358872-3732-3-git-send-email-jason.low2@hp.com Signed-off-by: Ingo Molnar --- include/linux/mutex.h | 4 +-- include/linux/osq_lock.h | 19 +++++++++++++++ include/linux/rwsem.h | 7 +++--- kernel/locking/mcs_spinlock.c | 46 +++++++++++++++++++++++++++++------ kernel/locking/mcs_spinlock.h | 5 ++-- kernel/locking/mutex.c | 2 +- kernel/locking/rwsem-xadd.c | 2 +- 7 files changed, 68 insertions(+), 17 deletions(-) create mode 100644 include/linux/osq_lock.h diff --git a/include/linux/mutex.h b/include/linux/mutex.h index 885f3f56a77fc..42aa9b9ecd5f8 100644 --- a/include/linux/mutex.h +++ b/include/linux/mutex.h @@ -17,6 +17,7 @@ #include #include #include +#include /* * Simple, straightforward mutexes with strict semantics: @@ -46,7 +47,6 @@ * - detects multi-task circular deadlocks and prints out all affected * locks and tasks (and only those tasks) */ -struct optimistic_spin_node; struct mutex { /* 1: unlocked, 0: locked, negative: locked, possible waiters */ atomic_t count; @@ -56,7 +56,7 @@ struct mutex { struct task_struct *owner; #endif #ifdef CONFIG_MUTEX_SPIN_ON_OWNER - struct optimistic_spin_node *osq; /* Spinner MCS lock */ + struct optimistic_spin_queue osq; /* Spinner MCS lock */ #endif #ifdef CONFIG_DEBUG_MUTEXES const char *name; diff --git a/include/linux/osq_lock.h b/include/linux/osq_lock.h new file mode 100644 index 0000000000000..b001682bf7cbd --- /dev/null +++ b/include/linux/osq_lock.h @@ -0,0 +1,19 @@ +#ifndef __LINUX_OSQ_LOCK_H +#define __LINUX_OSQ_LOCK_H + +/* + * An MCS like lock especially tailored for optimistic spinning for sleeping + * lock implementations (mutex, rwsem, etc). + */ + +#define OSQ_UNLOCKED_VAL (0) + +struct optimistic_spin_queue { + /* + * Stores an encoded value of the CPU # of the tail node in the queue. + * If the queue is empty, then it's set to OSQ_UNLOCKED_VAL. + */ + atomic_t tail; +}; + +#endif diff --git a/include/linux/rwsem.h b/include/linux/rwsem.h index ba3f108ddea1e..9fdcdd03507d1 100644 --- a/include/linux/rwsem.h +++ b/include/linux/rwsem.h @@ -13,10 +13,9 @@ #include #include #include - #include +#include -struct optimistic_spin_node; struct rw_semaphore; #ifdef CONFIG_RWSEM_GENERIC_SPINLOCK @@ -33,7 +32,7 @@ struct rw_semaphore { * if the owner is running on the cpu. */ struct task_struct *owner; - struct optimistic_spin_node *osq; /* spinner MCS lock */ + struct optimistic_spin_queue osq; /* spinner MCS lock */ #endif #ifdef CONFIG_DEBUG_LOCK_ALLOC struct lockdep_map dep_map; @@ -70,7 +69,7 @@ static inline int rwsem_is_locked(struct rw_semaphore *sem) __RAW_SPIN_LOCK_UNLOCKED(name.wait_lock), \ LIST_HEAD_INIT((name).wait_list), \ NULL, /* owner */ \ - NULL /* mcs lock */ \ + { ATOMIC_INIT(OSQ_UNLOCKED_VAL) } /* osq */ \ __RWSEM_DEP_MAP_INIT(name) } #else #define __RWSEM_INITIALIZER(name) \ diff --git a/kernel/locking/mcs_spinlock.c b/kernel/locking/mcs_spinlock.c index e9866f70e828a..32fc16c0a5450 100644 --- a/kernel/locking/mcs_spinlock.c +++ b/kernel/locking/mcs_spinlock.c @@ -16,19 +16,45 @@ */ static DEFINE_PER_CPU_SHARED_ALIGNED(struct optimistic_spin_node, osq_node); +/* + * We use the value 0 to represent "no CPU", thus the encoded value + * will be the CPU number incremented by 1. + */ +static inline int encode_cpu(int cpu_nr) +{ + return cpu_nr + 1; +} + +static inline struct optimistic_spin_node *decode_cpu(int encoded_cpu_val) +{ + int cpu_nr = encoded_cpu_val - 1; + + return per_cpu_ptr(&osq_node, cpu_nr); +} + /* * Get a stable @node->next pointer, either for unlock() or unqueue() purposes. * Can return NULL in case we were the last queued and we updated @lock instead. */ static inline struct optimistic_spin_node * -osq_wait_next(struct optimistic_spin_node **lock, +osq_wait_next(struct optimistic_spin_queue *lock, struct optimistic_spin_node *node, struct optimistic_spin_node *prev) { struct optimistic_spin_node *next = NULL; + int curr = encode_cpu(smp_processor_id()); + int old; + + /* + * If there is a prev node in queue, then the 'old' value will be + * the prev node's CPU #, else it's set to OSQ_UNLOCKED_VAL since if + * we're currently last in queue, then the queue will then become empty. + */ + old = prev ? prev->cpu : OSQ_UNLOCKED_VAL; for (;;) { - if (*lock == node && cmpxchg(lock, node, prev) == node) { + if (atomic_read(&lock->tail) == curr && + atomic_cmpxchg(&lock->tail, curr, old) == curr) { /* * We were the last queued, we moved @lock back. @prev * will now observe @lock and will complete its @@ -59,18 +85,23 @@ osq_wait_next(struct optimistic_spin_node **lock, return next; } -bool osq_lock(struct optimistic_spin_node **lock) +bool osq_lock(struct optimistic_spin_queue *lock) { struct optimistic_spin_node *node = this_cpu_ptr(&osq_node); struct optimistic_spin_node *prev, *next; + int curr = encode_cpu(smp_processor_id()); + int old; node->locked = 0; node->next = NULL; + node->cpu = curr; - node->prev = prev = xchg(lock, node); - if (likely(prev == NULL)) + old = atomic_xchg(&lock->tail, curr); + if (old == OSQ_UNLOCKED_VAL) return true; + prev = decode_cpu(old); + node->prev = prev; ACCESS_ONCE(prev->next) = node; /* @@ -149,15 +180,16 @@ bool osq_lock(struct optimistic_spin_node **lock) return false; } -void osq_unlock(struct optimistic_spin_node **lock) +void osq_unlock(struct optimistic_spin_queue *lock) { struct optimistic_spin_node *node = this_cpu_ptr(&osq_node); struct optimistic_spin_node *next; + int curr = encode_cpu(smp_processor_id()); /* * Fast path for the uncontended case. */ - if (likely(cmpxchg(lock, node, NULL) == node)) + if (likely(atomic_cmpxchg(&lock->tail, curr, OSQ_UNLOCKED_VAL) == curr)) return; /* diff --git a/kernel/locking/mcs_spinlock.h b/kernel/locking/mcs_spinlock.h index c99dc0052f492..74356dc0ce298 100644 --- a/kernel/locking/mcs_spinlock.h +++ b/kernel/locking/mcs_spinlock.h @@ -121,9 +121,10 @@ void mcs_spin_unlock(struct mcs_spinlock **lock, struct mcs_spinlock *node) struct optimistic_spin_node { struct optimistic_spin_node *next, *prev; int locked; /* 1 if lock acquired */ + int cpu; /* encoded CPU # value */ }; -extern bool osq_lock(struct optimistic_spin_node **lock); -extern void osq_unlock(struct optimistic_spin_node **lock); +extern bool osq_lock(struct optimistic_spin_queue *lock); +extern void osq_unlock(struct optimistic_spin_queue *lock); #endif /* __LINUX_MCS_SPINLOCK_H */ diff --git a/kernel/locking/mutex.c b/kernel/locking/mutex.c index bc73d33c6760e..d9b313906caa8 100644 --- a/kernel/locking/mutex.c +++ b/kernel/locking/mutex.c @@ -60,7 +60,7 @@ __mutex_init(struct mutex *lock, const char *name, struct lock_class_key *key) INIT_LIST_HEAD(&lock->wait_list); mutex_clear_owner(lock); #ifdef CONFIG_MUTEX_SPIN_ON_OWNER - lock->osq = NULL; + atomic_set(&lock->osq.tail, OSQ_UNLOCKED_VAL); #endif debug_mutex_init(lock, name, key); diff --git a/kernel/locking/rwsem-xadd.c b/kernel/locking/rwsem-xadd.c index c40c7d28661d1..b77a6230bbf67 100644 --- a/kernel/locking/rwsem-xadd.c +++ b/kernel/locking/rwsem-xadd.c @@ -84,7 +84,7 @@ void __init_rwsem(struct rw_semaphore *sem, const char *name, INIT_LIST_HEAD(&sem->wait_list); #ifdef CONFIG_SMP sem->owner = NULL; - sem->osq = NULL; + atomic_set(&sem->osq.tail, OSQ_UNLOCKED_VAL); #endif } From 4d9d951e6b5df85ccfca2c5bd8b4f5c71d256b65 Mon Sep 17 00:00:00 2001 From: Jason Low Date: Mon, 14 Jul 2014 10:27:50 -0700 Subject: [PATCH 216/268] locking/spinlocks/mcs: Introduce and use init macro and function for osq locks Currently, we initialize the osq lock by directly setting the lock's values. It would be preferable if we use an init macro to do the initialization like we do with other locks. This patch introduces and uses a macro and function for initializing the osq lock. Signed-off-by: Jason Low Signed-off-by: Peter Zijlstra Cc: Scott Norton Cc: "Paul E. McKenney" Cc: Dave Chinner Cc: Waiman Long Cc: Davidlohr Bueso Cc: Rik van Riel Cc: Andrew Morton Cc: "H. Peter Anvin" Cc: Steven Rostedt Cc: Tim Chen Cc: Konrad Rzeszutek Wilk Cc: Aswin Chandramouleeswaran Cc: Linus Torvalds Cc: Chris Mason Cc: Josef Bacik Link: http://lkml.kernel.org/r/1405358872-3732-4-git-send-email-jason.low2@hp.com Signed-off-by: Ingo Molnar --- include/linux/osq_lock.h | 8 ++++++++ include/linux/rwsem.h | 2 +- kernel/locking/mutex.c | 2 +- kernel/locking/rwsem-xadd.c | 2 +- 4 files changed, 11 insertions(+), 3 deletions(-) diff --git a/include/linux/osq_lock.h b/include/linux/osq_lock.h index b001682bf7cbd..90230d5811c5a 100644 --- a/include/linux/osq_lock.h +++ b/include/linux/osq_lock.h @@ -16,4 +16,12 @@ struct optimistic_spin_queue { atomic_t tail; }; +/* Init macro and function. */ +#define OSQ_LOCK_UNLOCKED { ATOMIC_INIT(OSQ_UNLOCKED_VAL) } + +static inline void osq_lock_init(struct optimistic_spin_queue *lock) +{ + atomic_set(&lock->tail, OSQ_UNLOCKED_VAL); +} + #endif diff --git a/include/linux/rwsem.h b/include/linux/rwsem.h index 9fdcdd03507d1..25cd9aa2f3d72 100644 --- a/include/linux/rwsem.h +++ b/include/linux/rwsem.h @@ -69,7 +69,7 @@ static inline int rwsem_is_locked(struct rw_semaphore *sem) __RAW_SPIN_LOCK_UNLOCKED(name.wait_lock), \ LIST_HEAD_INIT((name).wait_list), \ NULL, /* owner */ \ - { ATOMIC_INIT(OSQ_UNLOCKED_VAL) } /* osq */ \ + OSQ_LOCK_UNLOCKED /* osq */ \ __RWSEM_DEP_MAP_INIT(name) } #else #define __RWSEM_INITIALIZER(name) \ diff --git a/kernel/locking/mutex.c b/kernel/locking/mutex.c index d9b313906caa8..acca2c1a3c5e5 100644 --- a/kernel/locking/mutex.c +++ b/kernel/locking/mutex.c @@ -60,7 +60,7 @@ __mutex_init(struct mutex *lock, const char *name, struct lock_class_key *key) INIT_LIST_HEAD(&lock->wait_list); mutex_clear_owner(lock); #ifdef CONFIG_MUTEX_SPIN_ON_OWNER - atomic_set(&lock->osq.tail, OSQ_UNLOCKED_VAL); + osq_lock_init(&lock->osq); #endif debug_mutex_init(lock, name, key); diff --git a/kernel/locking/rwsem-xadd.c b/kernel/locking/rwsem-xadd.c index b77a6230bbf67..7190592c26457 100644 --- a/kernel/locking/rwsem-xadd.c +++ b/kernel/locking/rwsem-xadd.c @@ -84,7 +84,7 @@ void __init_rwsem(struct rw_semaphore *sem, const char *name, INIT_LIST_HEAD(&sem->wait_list); #ifdef CONFIG_SMP sem->owner = NULL; - atomic_set(&sem->osq.tail, OSQ_UNLOCKED_VAL); + osq_lock_init(&sem->osq); #endif } From 33ecd2083a9560fbc1ef1b1279ef3ecb4c012a4f Mon Sep 17 00:00:00 2001 From: Jason Low Date: Mon, 14 Jul 2014 10:27:51 -0700 Subject: [PATCH 217/268] locking/spinlocks/mcs: Micro-optimize osq_unlock() In the unlock function of the cancellable MCS spinlock, the first thing we do is to retrive the current CPU's osq node. However, due to the changes made in the previous patch, in the common case where the lock is not contended, we wouldn't need to access the current CPU's osq node anymore. This patch optimizes this by only retriving this CPU's osq node after we attempt the initial cmpxchg to unlock the osq and found that its contended. Signed-off-by: Jason Low Signed-off-by: Peter Zijlstra Cc: Scott Norton Cc: "Paul E. McKenney" Cc: Dave Chinner Cc: Waiman Long Cc: Davidlohr Bueso Cc: Rik van Riel Cc: Andrew Morton Cc: "H. Peter Anvin" Cc: Steven Rostedt Cc: Tim Chen Cc: Konrad Rzeszutek Wilk Cc: Aswin Chandramouleeswaran Cc: Linus Torvalds Link: http://lkml.kernel.org/r/1405358872-3732-5-git-send-email-jason.low2@hp.com Signed-off-by: Ingo Molnar --- kernel/locking/mcs_spinlock.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/kernel/locking/mcs_spinlock.c b/kernel/locking/mcs_spinlock.c index 32fc16c0a5450..be9ee1559fca5 100644 --- a/kernel/locking/mcs_spinlock.c +++ b/kernel/locking/mcs_spinlock.c @@ -182,8 +182,7 @@ bool osq_lock(struct optimistic_spin_queue *lock) void osq_unlock(struct optimistic_spin_queue *lock) { - struct optimistic_spin_node *node = this_cpu_ptr(&osq_node); - struct optimistic_spin_node *next; + struct optimistic_spin_node *node, *next; int curr = encode_cpu(smp_processor_id()); /* @@ -195,6 +194,7 @@ void osq_unlock(struct optimistic_spin_queue *lock) /* * Second most likely case. */ + node = this_cpu_ptr(&osq_node); next = xchg(&node->next, NULL); if (next) { ACCESS_ONCE(next->locked) = 1; From b0ab99e7736af88b8ac1b7ae50ea287fffa2badc Mon Sep 17 00:00:00 2001 From: Mateusz Guzik Date: Sat, 14 Jun 2014 15:00:09 +0200 Subject: [PATCH 218/268] sched: Fix possible divide by zero in avg_atom() calculation proc_sched_show_task() does: if (nr_switches) do_div(avg_atom, nr_switches); nr_switches is unsigned long and do_div truncates it to 32 bits, which means it can test non-zero on e.g. x86-64 and be truncated to zero for division. Fix the problem by using div64_ul() instead. As a side effect calculations of avg_atom for big nr_switches are now correct. Signed-off-by: Mateusz Guzik Signed-off-by: Peter Zijlstra Cc: stable@vger.kernel.org Cc: Linus Torvalds Link: http://lkml.kernel.org/r/1402750809-31991-1-git-send-email-mguzik@redhat.com Signed-off-by: Ingo Molnar --- kernel/sched/debug.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kernel/sched/debug.c b/kernel/sched/debug.c index 695f9773bb601..627b3c34b821d 100644 --- a/kernel/sched/debug.c +++ b/kernel/sched/debug.c @@ -608,7 +608,7 @@ void proc_sched_show_task(struct task_struct *p, struct seq_file *m) avg_atom = p->se.sum_exec_runtime; if (nr_switches) - do_div(avg_atom, nr_switches); + avg_atom = div64_ul(avg_atom, nr_switches); else avg_atom = -1LL; From 4cf465b579c20bee868464f5d664f8d2d96cd370 Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Wed, 16 Jul 2014 13:28:34 +0200 Subject: [PATCH 219/268] ACPI / video: Add use_native_backlight quirk for HP ProBook 4540s As reported here: https://bugzilla.redhat.com/show_bug.cgi?id=1025690 This is yet another model which needs this quirk. Link: https://bugzilla.redhat.com/show_bug.cgi?id=1025690 Signed-off-by: Hans de Goede Signed-off-by: Rafael J. Wysocki --- drivers/acpi/video.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/drivers/acpi/video.c b/drivers/acpi/video.c index 29649c1948860..350d52a8f7811 100644 --- a/drivers/acpi/video.c +++ b/drivers/acpi/video.c @@ -581,6 +581,14 @@ static struct dmi_system_id video_dmi_table[] __initdata = { }, { .callback = video_set_use_native_backlight, + .ident = "HP ProBook 4540s", + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "Hewlett-Packard"), + DMI_MATCH(DMI_PRODUCT_VERSION, "HP ProBook 4540s"), + }, + }, + { + .callback = video_set_use_native_backlight, .ident = "HP ProBook 2013 models", .matches = { DMI_MATCH(DMI_SYS_VENDOR, "Hewlett-Packard"), From 97d496bf1a7934c77f8bb0de9b773dd759def616 Mon Sep 17 00:00:00 2001 From: Linus Walleij Date: Sat, 12 Jul 2014 11:43:33 +0200 Subject: [PATCH 220/268] cpufreq: sa1110: set memory type for h3600 The Compaq iPAQ h3600 also has the K4S281632b-1H memory type. Verified by prying apart a broken board. Signed-off-by: Linus Walleij Acked-by: Viresh Kumar Signed-off-by: Rafael J. Wysocki --- drivers/cpufreq/sa1110-cpufreq.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/cpufreq/sa1110-cpufreq.c b/drivers/cpufreq/sa1110-cpufreq.c index 546376719d8f3..b5befc2111721 100644 --- a/drivers/cpufreq/sa1110-cpufreq.c +++ b/drivers/cpufreq/sa1110-cpufreq.c @@ -349,7 +349,7 @@ static int __init sa1110_clk_init(void) name = "K4S641632D"; if (machine_is_h3100()) name = "KM416S4030CT"; - if (machine_is_jornada720()) + if (machine_is_jornada720() || machine_is_h3600()) name = "K4S281632B-1H"; if (machine_is_nanoengine()) name = "MT48LC8M16A2TG-75"; From 7e02168711e70a93a4795870e028a260702bdcb5 Mon Sep 17 00:00:00 2001 From: Nicolas Del Piano Date: Sun, 13 Jul 2014 18:59:00 -0300 Subject: [PATCH 221/268] cpufreq: imx6q: Select PM_OPP PM_OPP is a library used by several of the existing cpufreq drivers. ARM IMX6Q cpufreq driver uses this library for its functionality. Thus, it should be selected in Kconfig. Reported-by: Ezequiel Garcia Signed-off-by: Nicolas Del Piano Acked-by: Viresh Kumar Signed-off-by: Rafael J. Wysocki --- drivers/cpufreq/Kconfig.arm | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/cpufreq/Kconfig.arm b/drivers/cpufreq/Kconfig.arm index ebac671150098..8c5cf4bdeab8f 100644 --- a/drivers/cpufreq/Kconfig.arm +++ b/drivers/cpufreq/Kconfig.arm @@ -104,6 +104,7 @@ config ARM_IMX6Q_CPUFREQ tristate "Freescale i.MX6 cpufreq support" depends on ARCH_MXC depends on REGULATOR_ANATOP + select PM_OPP help This adds cpufreq driver support for Freescale i.MX6 series SoCs. From 13b9a962a2594ee880c5d50d7f70964da1d4fe5a Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Wed, 16 Jul 2014 14:54:55 +0200 Subject: [PATCH 222/268] locking/rwsem: Rename 'activity' to 'count' There are two definitions of struct rw_semaphore, one in linux/rwsem.h and one in linux/rwsem-spinlock.h. For some reason they have different names for the initial field. This makes it impossible to use C99 named initialization for __RWSEM_INITIALIZER() -- or we have to duplicate that entire thing along with the structure definitions. The simpler patch is renaming the rwsem-spinlock variant to match the regular rwsem. This allows us to switch to C99 named initialization. Signed-off-by: Peter Zijlstra Cc: Linus Torvalds Link: http://lkml.kernel.org/n/tip-bmrZolsbGmautmzrerog27io@git.kernel.org Signed-off-by: Ingo Molnar --- include/linux/rwsem-spinlock.h | 8 ++++---- kernel/locking/rwsem-spinlock.c | 28 ++++++++++++++-------------- 2 files changed, 18 insertions(+), 18 deletions(-) diff --git a/include/linux/rwsem-spinlock.h b/include/linux/rwsem-spinlock.h index d5b13bc07a0b7..561e8615528d4 100644 --- a/include/linux/rwsem-spinlock.h +++ b/include/linux/rwsem-spinlock.h @@ -15,13 +15,13 @@ #ifdef __KERNEL__ /* * the rw-semaphore definition - * - if activity is 0 then there are no active readers or writers - * - if activity is +ve then that is the number of active readers - * - if activity is -1 then there is one active writer + * - if count is 0 then there are no active readers or writers + * - if count is +ve then that is the number of active readers + * - if count is -1 then there is one active writer * - if wait_list is not empty, then there are processes waiting for the semaphore */ struct rw_semaphore { - __s32 activity; + __s32 count; raw_spinlock_t wait_lock; struct list_head wait_list; #ifdef CONFIG_DEBUG_LOCK_ALLOC diff --git a/kernel/locking/rwsem-spinlock.c b/kernel/locking/rwsem-spinlock.c index 9be8a91449786..2c93571162cb7 100644 --- a/kernel/locking/rwsem-spinlock.c +++ b/kernel/locking/rwsem-spinlock.c @@ -26,7 +26,7 @@ int rwsem_is_locked(struct rw_semaphore *sem) unsigned long flags; if (raw_spin_trylock_irqsave(&sem->wait_lock, flags)) { - ret = (sem->activity != 0); + ret = (sem->count != 0); raw_spin_unlock_irqrestore(&sem->wait_lock, flags); } return ret; @@ -46,7 +46,7 @@ void __init_rwsem(struct rw_semaphore *sem, const char *name, debug_check_no_locks_freed((void *)sem, sizeof(*sem)); lockdep_init_map(&sem->dep_map, name, key, 0); #endif - sem->activity = 0; + sem->count = 0; raw_spin_lock_init(&sem->wait_lock); INIT_LIST_HEAD(&sem->wait_list); } @@ -95,7 +95,7 @@ __rwsem_do_wake(struct rw_semaphore *sem, int wakewrite) waiter = list_entry(next, struct rwsem_waiter, list); } while (waiter->type != RWSEM_WAITING_FOR_WRITE); - sem->activity += woken; + sem->count += woken; out: return sem; @@ -126,9 +126,9 @@ void __sched __down_read(struct rw_semaphore *sem) raw_spin_lock_irqsave(&sem->wait_lock, flags); - if (sem->activity >= 0 && list_empty(&sem->wait_list)) { + if (sem->count >= 0 && list_empty(&sem->wait_list)) { /* granted */ - sem->activity++; + sem->count++; raw_spin_unlock_irqrestore(&sem->wait_lock, flags); goto out; } @@ -170,9 +170,9 @@ int __down_read_trylock(struct rw_semaphore *sem) raw_spin_lock_irqsave(&sem->wait_lock, flags); - if (sem->activity >= 0 && list_empty(&sem->wait_list)) { + if (sem->count >= 0 && list_empty(&sem->wait_list)) { /* granted */ - sem->activity++; + sem->count++; ret = 1; } @@ -206,7 +206,7 @@ void __sched __down_write_nested(struct rw_semaphore *sem, int subclass) * itself into sleep and waiting for system woke it or someone * else in the head of the wait list up. */ - if (sem->activity == 0) + if (sem->count == 0) break; set_task_state(tsk, TASK_UNINTERRUPTIBLE); raw_spin_unlock_irqrestore(&sem->wait_lock, flags); @@ -214,7 +214,7 @@ void __sched __down_write_nested(struct rw_semaphore *sem, int subclass) raw_spin_lock_irqsave(&sem->wait_lock, flags); } /* got the lock */ - sem->activity = -1; + sem->count = -1; list_del(&waiter.list); raw_spin_unlock_irqrestore(&sem->wait_lock, flags); @@ -235,9 +235,9 @@ int __down_write_trylock(struct rw_semaphore *sem) raw_spin_lock_irqsave(&sem->wait_lock, flags); - if (sem->activity == 0) { + if (sem->count == 0) { /* got the lock */ - sem->activity = -1; + sem->count = -1; ret = 1; } @@ -255,7 +255,7 @@ void __up_read(struct rw_semaphore *sem) raw_spin_lock_irqsave(&sem->wait_lock, flags); - if (--sem->activity == 0 && !list_empty(&sem->wait_list)) + if (--sem->count == 0 && !list_empty(&sem->wait_list)) sem = __rwsem_wake_one_writer(sem); raw_spin_unlock_irqrestore(&sem->wait_lock, flags); @@ -270,7 +270,7 @@ void __up_write(struct rw_semaphore *sem) raw_spin_lock_irqsave(&sem->wait_lock, flags); - sem->activity = 0; + sem->count = 0; if (!list_empty(&sem->wait_list)) sem = __rwsem_do_wake(sem, 1); @@ -287,7 +287,7 @@ void __downgrade_write(struct rw_semaphore *sem) raw_spin_lock_irqsave(&sem->wait_lock, flags); - sem->activity = 1; + sem->count = 1; if (!list_empty(&sem->wait_list)) sem = __rwsem_do_wake(sem, 0); From ce069fc920e5734558b3d9cbef1ab06cf01ee793 Mon Sep 17 00:00:00 2001 From: Jason Low Date: Mon, 14 Jul 2014 10:27:52 -0700 Subject: [PATCH 223/268] locking/rwsem: Reduce the size of struct rw_semaphore Recent optimistic spinning additions to rwsem provide significant performance benefits on many workloads on large machines. The cost of it was increasing the size of the rwsem structure by up to 128 bits. However, now that the previous patches in this series bring the overhead of struct optimistic_spin_queue to 32 bits, this patch reorders some fields in struct rw_semaphore such that we can reduce the overhead of the rwsem structure by 64 bits (on 64 bit systems). The extra overhead required for rwsem optimistic spinning would now be up to 8 additional bytes instead of up to 16 bytes. Additionally, the size of rwsem would now be more in line with mutexes. Signed-off-by: Jason Low Signed-off-by: Peter Zijlstra Cc: Scott Norton Cc: "Paul E. McKenney" Cc: Dave Chinner Cc: Waiman Long Cc: Davidlohr Bueso Cc: Rik van Riel Cc: Andrew Morton Cc: "H. Peter Anvin" Cc: Steven Rostedt Cc: Tim Chen Cc: Konrad Rzeszutek Wilk Cc: Aswin Chandramouleeswaran Cc: Linus Torvalds Cc: Chris Mason Cc: Josef Bacik Link: http://lkml.kernel.org/r/1405358872-3732-6-git-send-email-jason.low2@hp.com Signed-off-by: Ingo Molnar --- include/linux/rwsem.h | 25 +++++++++++-------------- 1 file changed, 11 insertions(+), 14 deletions(-) diff --git a/include/linux/rwsem.h b/include/linux/rwsem.h index 25cd9aa2f3d72..716807f0eb2d6 100644 --- a/include/linux/rwsem.h +++ b/include/linux/rwsem.h @@ -24,15 +24,15 @@ struct rw_semaphore; /* All arch specific implementations share the same struct */ struct rw_semaphore { long count; - raw_spinlock_t wait_lock; struct list_head wait_list; + raw_spinlock_t wait_lock; #ifdef CONFIG_SMP + struct optimistic_spin_queue osq; /* spinner MCS lock */ /* * Write owner. Used as a speculative check to see * if the owner is running on the cpu. */ struct task_struct *owner; - struct optimistic_spin_queue osq; /* spinner MCS lock */ #endif #ifdef CONFIG_DEBUG_LOCK_ALLOC struct lockdep_map dep_map; @@ -64,21 +64,18 @@ static inline int rwsem_is_locked(struct rw_semaphore *sem) #endif #if defined(CONFIG_SMP) && !defined(CONFIG_RWSEM_GENERIC_SPINLOCK) -#define __RWSEM_INITIALIZER(name) \ - { RWSEM_UNLOCKED_VALUE, \ - __RAW_SPIN_LOCK_UNLOCKED(name.wait_lock), \ - LIST_HEAD_INIT((name).wait_list), \ - NULL, /* owner */ \ - OSQ_LOCK_UNLOCKED /* osq */ \ - __RWSEM_DEP_MAP_INIT(name) } +#define __RWSEM_OPT_INIT(lockname) , .osq = OSQ_LOCK_UNLOCKED, .owner = NULL #else -#define __RWSEM_INITIALIZER(name) \ - { RWSEM_UNLOCKED_VALUE, \ - __RAW_SPIN_LOCK_UNLOCKED(name.wait_lock), \ - LIST_HEAD_INIT((name).wait_list) \ - __RWSEM_DEP_MAP_INIT(name) } +#define __RWSEM_OPT_INIT(lockname) #endif +#define __RWSEM_INITIALIZER(name) \ + { .count = RWSEM_UNLOCKED_VALUE, \ + .wait_list = LIST_HEAD_INIT((name).wait_list), \ + .wait_lock = __RAW_SPIN_LOCK_UNLOCKED(name.wait_lock) \ + __RWSEM_OPT_INIT(name) \ + __RWSEM_DEP_MAP_INIT(name) } + #define DECLARE_RWSEM(name) \ struct rw_semaphore name = __RWSEM_INITIALIZER(name) From 4badad352a6bb202ec68afa7a574c0bb961e5ebc Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Fri, 6 Jun 2014 19:53:16 +0200 Subject: [PATCH 224/268] locking/mutex: Disable optimistic spinning on some architectures The optimistic spin code assumes regular stores and cmpxchg() play nice; this is found to not be true for at least: parisc, sparc32, tile32, metag-lock1, arc-!llsc and hexagon. There is further wreckage, but this in particular seemed easy to trigger, so blacklist this. Opt in for known good archs. Signed-off-by: Peter Zijlstra Reported-by: Mikulas Patocka Cc: David Miller Cc: Chris Metcalf Cc: James Bottomley Cc: Vineet Gupta Cc: Jason Low Cc: Waiman Long Cc: "James E.J. Bottomley" Cc: Paul McKenney Cc: John David Anglin Cc: James Hogan Cc: Linus Torvalds Cc: Davidlohr Bueso Cc: stable@vger.kernel.org Cc: Benjamin Herrenschmidt Cc: Catalin Marinas Cc: Russell King Cc: Will Deacon Cc: linux-arm-kernel@lists.infradead.org Cc: linux-kernel@vger.kernel.org Cc: linuxppc-dev@lists.ozlabs.org Cc: sparclinux@vger.kernel.org Link: http://lkml.kernel.org/r/20140606175316.GV13930@laptop.programming.kicks-ass.net Signed-off-by: Ingo Molnar --- arch/arm/Kconfig | 1 + arch/arm64/Kconfig | 1 + arch/powerpc/Kconfig | 1 + arch/sparc/Kconfig | 1 + arch/x86/Kconfig | 1 + kernel/Kconfig.locks | 5 ++++- 6 files changed, 9 insertions(+), 1 deletion(-) diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig index 245058b3b0ef7..88acf8bc1490a 100644 --- a/arch/arm/Kconfig +++ b/arch/arm/Kconfig @@ -6,6 +6,7 @@ config ARM select ARCH_HAS_TICK_BROADCAST if GENERIC_CLOCKEVENTS_BROADCAST select ARCH_HAVE_CUSTOM_GPIO_H select ARCH_MIGHT_HAVE_PC_PARPORT + select ARCH_SUPPORTS_ATOMIC_RMW select ARCH_USE_BUILTIN_BSWAP select ARCH_USE_CMPXCHG_LOCKREF select ARCH_WANT_IPC_PARSE_VERSION diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig index a474de346be66..839f48c26ef02 100644 --- a/arch/arm64/Kconfig +++ b/arch/arm64/Kconfig @@ -4,6 +4,7 @@ config ARM64 select ARCH_HAS_OPP select ARCH_HAS_TICK_BROADCAST if GENERIC_CLOCKEVENTS_BROADCAST select ARCH_USE_CMPXCHG_LOCKREF + select ARCH_SUPPORTS_ATOMIC_RMW select ARCH_WANT_OPTIONAL_GPIOLIB select ARCH_WANT_COMPAT_IPC_PARSE_VERSION select ARCH_WANT_FRAME_POINTERS diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig index fefe7c8bf05f5..80b94b0add1f4 100644 --- a/arch/powerpc/Kconfig +++ b/arch/powerpc/Kconfig @@ -145,6 +145,7 @@ config PPC select HAVE_IRQ_EXIT_ON_IRQ_STACK select ARCH_USE_CMPXCHG_LOCKREF if PPC64 select HAVE_ARCH_AUDITSYSCALL + select ARCH_SUPPORTS_ATOMIC_RMW config GENERIC_CSUM def_bool CPU_LITTLE_ENDIAN diff --git a/arch/sparc/Kconfig b/arch/sparc/Kconfig index 29f2e988c56a9..407c87d9879ae 100644 --- a/arch/sparc/Kconfig +++ b/arch/sparc/Kconfig @@ -78,6 +78,7 @@ config SPARC64 select HAVE_C_RECORDMCOUNT select NO_BOOTMEM select HAVE_ARCH_AUDITSYSCALL + select ARCH_SUPPORTS_ATOMIC_RMW config ARCH_DEFCONFIG string diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index a8f749ef0fdcc..d24887b645dc4 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -131,6 +131,7 @@ config X86 select HAVE_CC_STACKPROTECTOR select GENERIC_CPU_AUTOPROBE select HAVE_ARCH_AUDITSYSCALL + select ARCH_SUPPORTS_ATOMIC_RMW config INSTRUCTION_DECODER def_bool y diff --git a/kernel/Kconfig.locks b/kernel/Kconfig.locks index 35536d9c09642..81907941d9219 100644 --- a/kernel/Kconfig.locks +++ b/kernel/Kconfig.locks @@ -220,9 +220,12 @@ config INLINE_WRITE_UNLOCK_IRQRESTORE endif +config ARCH_SUPPORTS_ATOMIC_RMW + bool + config MUTEX_SPIN_ON_OWNER def_bool y - depends on SMP && !DEBUG_MUTEXES + depends on SMP && !DEBUG_MUTEXES && ARCH_SUPPORTS_ATOMIC_RMW config ARCH_USE_QUEUE_RWLOCK bool From 5db6c6fefb1ca0e81e3bd6dd8998bf51c453d823 Mon Sep 17 00:00:00 2001 From: Davidlohr Bueso Date: Fri, 11 Jul 2014 14:00:06 -0700 Subject: [PATCH 225/268] locking/rwsem: Add CONFIG_RWSEM_SPIN_ON_OWNER Just like with mutexes (CONFIG_MUTEX_SPIN_ON_OWNER), encapsulate the dependencies for rwsem optimistic spinning. No logical changes here as it continues to depend on both SMP and the XADD algorithm variant. Signed-off-by: Davidlohr Bueso Acked-by: Jason Low [ Also make it depend on ARCH_SUPPORTS_ATOMIC_RMW. ] Signed-off-by: Peter Zijlstra Link: http://lkml.kernel.org/r/1405112406-13052-2-git-send-email-davidlohr@hp.com Cc: aswin@hp.com Cc: Chris Mason Cc: Davidlohr Bueso Cc: Josef Bacik Cc: Linus Torvalds Cc: Waiman Long Signed-off-by: Ingo Molnar Signed-off-by: Ingo Molnar --- include/linux/rwsem.h | 6 ++++-- kernel/Kconfig.locks | 4 ++++ kernel/locking/rwsem-xadd.c | 4 ++-- kernel/locking/rwsem.c | 2 +- 4 files changed, 11 insertions(+), 5 deletions(-) diff --git a/include/linux/rwsem.h b/include/linux/rwsem.h index 716807f0eb2d6..035d3c57fc8a7 100644 --- a/include/linux/rwsem.h +++ b/include/linux/rwsem.h @@ -14,7 +14,9 @@ #include #include #include +#ifdef CONFIG_RWSEM_SPIN_ON_OWNER #include +#endif struct rw_semaphore; @@ -26,7 +28,7 @@ struct rw_semaphore { long count; struct list_head wait_list; raw_spinlock_t wait_lock; -#ifdef CONFIG_SMP +#ifdef CONFIG_RWSEM_SPIN_ON_OWNER struct optimistic_spin_queue osq; /* spinner MCS lock */ /* * Write owner. Used as a speculative check to see @@ -63,7 +65,7 @@ static inline int rwsem_is_locked(struct rw_semaphore *sem) # define __RWSEM_DEP_MAP_INIT(lockname) #endif -#if defined(CONFIG_SMP) && !defined(CONFIG_RWSEM_GENERIC_SPINLOCK) +#ifdef CONFIG_RWSEM_SPIN_ON_OWNER #define __RWSEM_OPT_INIT(lockname) , .osq = OSQ_LOCK_UNLOCKED, .owner = NULL #else #define __RWSEM_OPT_INIT(lockname) diff --git a/kernel/Kconfig.locks b/kernel/Kconfig.locks index 81907941d9219..76768ee812b27 100644 --- a/kernel/Kconfig.locks +++ b/kernel/Kconfig.locks @@ -227,6 +227,10 @@ config MUTEX_SPIN_ON_OWNER def_bool y depends on SMP && !DEBUG_MUTEXES && ARCH_SUPPORTS_ATOMIC_RMW +config RWSEM_SPIN_ON_OWNER + def_bool y + depends on SMP && RWSEM_XCHGADD_ALGORITHM && ARCH_SUPPORTS_ATOMIC_RMW + config ARCH_USE_QUEUE_RWLOCK bool diff --git a/kernel/locking/rwsem-xadd.c b/kernel/locking/rwsem-xadd.c index 7190592c26457..a2391ac135c8d 100644 --- a/kernel/locking/rwsem-xadd.c +++ b/kernel/locking/rwsem-xadd.c @@ -82,7 +82,7 @@ void __init_rwsem(struct rw_semaphore *sem, const char *name, sem->count = RWSEM_UNLOCKED_VALUE; raw_spin_lock_init(&sem->wait_lock); INIT_LIST_HEAD(&sem->wait_list); -#ifdef CONFIG_SMP +#ifdef CONFIG_RWSEM_SPIN_ON_OWNER sem->owner = NULL; osq_lock_init(&sem->osq); #endif @@ -262,7 +262,7 @@ static inline bool rwsem_try_write_lock(long count, struct rw_semaphore *sem) return false; } -#ifdef CONFIG_SMP +#ifdef CONFIG_RWSEM_SPIN_ON_OWNER /* * Try to acquire write lock before the writer has been put on wait queue. */ diff --git a/kernel/locking/rwsem.c b/kernel/locking/rwsem.c index 42f806de49d42..e2d3bc7f03b41 100644 --- a/kernel/locking/rwsem.c +++ b/kernel/locking/rwsem.c @@ -12,7 +12,7 @@ #include -#if defined(CONFIG_SMP) && defined(CONFIG_RWSEM_XCHGADD_ALGORITHM) +#ifdef CONFIG_RWSEM_SPIN_ON_OWNER static inline void rwsem_set_owner(struct rw_semaphore *sem) { sem->owner = current; From 2fa1adc07089020984d52bb55c5af0b9d86dff21 Mon Sep 17 00:00:00 2001 From: Quentin Armitage Date: Thu, 10 Jul 2014 11:15:55 +0100 Subject: [PATCH 226/268] cpufreq: kirkwood: Reinstate cpufreq driver for ARCH_KIRKWOOD Commit ff1f0018cf66080d8e6f59791e552615648a033a ("drivers: Enable building of Kirkwood drivers for mach-mvebu") added Kirkwood into mach-mvebu, adding MACH_KIRKWOOD to ARCH_KIRKWOOD in the KConfig files. The change for ARM_KIRKWOOD_CPUFREQ replaced ARCH_KIRKWOOD with MACH_KIRKWOOD, whereas all the other changes were ARCH_KIRKWOOD || MACH_KIRKWOOD. As a consequence of this change, the cpufreq driver is no longer enabled for ARCH_KIRKWOOD. This patch reinstates ARM_KIRKWOOD_CPUFREQ for ARCH_KIRKWOOD. Signed-off-by: Quentin Armitage Acked-by: Andrew Lunn Signed-off-by: Rafael J. Wysocki --- drivers/cpufreq/Kconfig.arm | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/cpufreq/Kconfig.arm b/drivers/cpufreq/Kconfig.arm index 8c5cf4bdeab8f..7364a538e0562 100644 --- a/drivers/cpufreq/Kconfig.arm +++ b/drivers/cpufreq/Kconfig.arm @@ -119,7 +119,7 @@ config ARM_INTEGRATOR If in doubt, say Y. config ARM_KIRKWOOD_CPUFREQ - def_bool MACH_KIRKWOOD + def_bool ARCH_KIRKWOOD || MACH_KIRKWOOD help This adds the CPUFreq driver for Marvell Kirkwood SoCs. From 1bf8cc3d017575a38d4361f56ccc0a670a16bcd9 Mon Sep 17 00:00:00 2001 From: Viresh Kumar Date: Fri, 11 Jul 2014 20:24:19 +0530 Subject: [PATCH 227/268] cpufreq: cpu0: OPPs can be populated at runtime OPPs can be populated statically, via DT, or added at run time with dev_pm_opp_add(). While this driver handles the first case correctly, it would fail to populate OPPs added at runtime. Because call to of_init_opp_table() would fail as there are no OPPs in DT and probe will return early. To fix this, remove error checking and call dev_pm_opp_init_cpufreq_table() unconditionally. Update bindings as well. Suggested-by: Stephen Boyd Tested-by: Stephen Boyd Signed-off-by: Viresh Kumar Acked-by: Santosh Shilimkar Signed-off-by: Rafael J. Wysocki --- Documentation/devicetree/bindings/cpufreq/cpufreq-cpu0.txt | 6 ++++-- drivers/cpufreq/cpufreq-cpu0.c | 7 ++----- 2 files changed, 6 insertions(+), 7 deletions(-) diff --git a/Documentation/devicetree/bindings/cpufreq/cpufreq-cpu0.txt b/Documentation/devicetree/bindings/cpufreq/cpufreq-cpu0.txt index f055515d2b624..366690cb86a30 100644 --- a/Documentation/devicetree/bindings/cpufreq/cpufreq-cpu0.txt +++ b/Documentation/devicetree/bindings/cpufreq/cpufreq-cpu0.txt @@ -8,10 +8,12 @@ Both required and optional properties listed below must be defined under node /cpus/cpu@0. Required properties: -- operating-points: Refer to Documentation/devicetree/bindings/power/opp.txt - for details +- None Optional properties: +- operating-points: Refer to Documentation/devicetree/bindings/power/opp.txt for + details. OPPs *must* be supplied either via DT, i.e. this property, or + populated at runtime. - clock-latency: Specify the possible maximum transition latency for clock, in unit of nanoseconds. - voltage-tolerance: Specify the CPU voltage tolerance in percentage. diff --git a/drivers/cpufreq/cpufreq-cpu0.c b/drivers/cpufreq/cpufreq-cpu0.c index ee1ae303a07c4..86beda9f950b7 100644 --- a/drivers/cpufreq/cpufreq-cpu0.c +++ b/drivers/cpufreq/cpufreq-cpu0.c @@ -152,11 +152,8 @@ static int cpu0_cpufreq_probe(struct platform_device *pdev) goto out_put_reg; } - ret = of_init_opp_table(cpu_dev); - if (ret) { - pr_err("failed to init OPP table: %d\n", ret); - goto out_put_clk; - } + /* OPPs might be populated at runtime, don't check for error here */ + of_init_opp_table(cpu_dev); ret = dev_pm_opp_init_cpufreq_table(cpu_dev, &freq_table); if (ret) { From fbb60fe35ad579b511de8604b06a30b43846473b Mon Sep 17 00:00:00 2001 From: Jason Wang Date: Mon, 12 May 2014 16:35:39 +0800 Subject: [PATCH 228/268] drm/qxl: return IRQ_NONE if it was not our irq Return IRQ_NONE if it was not our irq. This is necessary for the case when qxl is sharing irq line with a device A in a crash kernel. If qxl is initialized before A and A's irq was raised during this gap, returning IRQ_HANDLED in this case will cause this irq to be raised again after EOI since kernel think it was handled but in fact it was not. Cc: Gerd Hoffmann Cc: stable@vger.kernel.org Signed-off-by: Jason Wang Signed-off-by: Dave Airlie --- drivers/gpu/drm/qxl/qxl_irq.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/gpu/drm/qxl/qxl_irq.c b/drivers/gpu/drm/qxl/qxl_irq.c index 34d6a85e90236..0bf1e20c6e44c 100644 --- a/drivers/gpu/drm/qxl/qxl_irq.c +++ b/drivers/gpu/drm/qxl/qxl_irq.c @@ -33,6 +33,9 @@ irqreturn_t qxl_irq_handler(int irq, void *arg) pending = xchg(&qdev->ram_header->int_pending, 0); + if (!pending) + return IRQ_NONE; + atomic_inc(&qdev->irq_received); if (pending & QXL_INTERRUPT_DISPLAY) { From de12d6f4b10b21854441f5242dcb29ea96181e58 Mon Sep 17 00:00:00 2001 From: Guenter Roeck Date: Wed, 16 Jul 2014 17:40:31 -0700 Subject: [PATCH 229/268] hwmon: (adt7470) Fix writes to temperature limit registers Temperature limit registers are signed. Limits therefore need to be clamped to (-128, 127) degrees C and not to (0, 255) degrees C. Without this fix, writing a limit of 128 degrees C sets the actual limit to -128 degrees C. Signed-off-by: Guenter Roeck Cc: stable@vger.kernel.org Reviewed-by: Axel Lin --- drivers/hwmon/adt7470.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/hwmon/adt7470.c b/drivers/hwmon/adt7470.c index 0f4dea5ccf171..9ee3913850d68 100644 --- a/drivers/hwmon/adt7470.c +++ b/drivers/hwmon/adt7470.c @@ -515,7 +515,7 @@ static ssize_t set_temp_min(struct device *dev, return -EINVAL; temp = DIV_ROUND_CLOSEST(temp, 1000); - temp = clamp_val(temp, 0, 255); + temp = clamp_val(temp, -128, 127); mutex_lock(&data->lock); data->temp_min[attr->index] = temp; @@ -549,7 +549,7 @@ static ssize_t set_temp_max(struct device *dev, return -EINVAL; temp = DIV_ROUND_CLOSEST(temp, 1000); - temp = clamp_val(temp, 0, 255); + temp = clamp_val(temp, -128, 127); mutex_lock(&data->lock); data->temp_max[attr->index] = temp; @@ -826,7 +826,7 @@ static ssize_t set_pwm_tmin(struct device *dev, return -EINVAL; temp = DIV_ROUND_CLOSEST(temp, 1000); - temp = clamp_val(temp, 0, 255); + temp = clamp_val(temp, -128, 127); mutex_lock(&data->lock); data->pwm_tmin[attr->index] = temp; From 652c1a05171695d21b84dd3a723606b50eeb80fd Mon Sep 17 00:00:00 2001 From: Or Gerlitz Date: Wed, 25 Jun 2014 16:44:14 +0300 Subject: [PATCH 230/268] IB/mlx5: Enable "block multicast loopback" for kernel consumers In commit f360d88a2efd, we advertise blocking multicast loopback to both kernel and userspace consumers, but don't allow kernel consumers (e.g IPoIB) to use it with their UD QPs. Fix that. Fixes: f360d88a2efd ("IB/mlx5: Add block multicast loopback support") Reported-by: Haggai Eran Signed-off-by: Eli Cohen Signed-off-by: Or Gerlitz Signed-off-by: Roland Dreier --- drivers/infiniband/hw/mlx5/qp.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/infiniband/hw/mlx5/qp.c b/drivers/infiniband/hw/mlx5/qp.c index d13ddf1c00333..bbbcf389272cb 100644 --- a/drivers/infiniband/hw/mlx5/qp.c +++ b/drivers/infiniband/hw/mlx5/qp.c @@ -675,7 +675,7 @@ static int create_kernel_qp(struct mlx5_ib_dev *dev, int err; uuari = &dev->mdev.priv.uuari; - if (init_attr->create_flags & ~IB_QP_CREATE_SIGNATURE_EN) + if (init_attr->create_flags & ~(IB_QP_CREATE_SIGNATURE_EN | IB_QP_CREATE_BLOCK_MULTICAST_LOOPBACK)) return -EINVAL; if (init_attr->qp_type == MLX5_IB_QPT_REG_UMR) From 92c14bd9477a20a83144f08c0ca25b0308bf0730 Mon Sep 17 00:00:00 2001 From: Viresh Kumar Date: Thu, 17 Jul 2014 10:48:25 +0530 Subject: [PATCH 231/268] cpufreq: move policy kobj to policy->cpu at resume This is only relevant to implementations with multiple clusters, where clusters have separate clock lines but all CPUs within a cluster share it. Consider a dual cluster platform with 2 cores per cluster. During suspend we start hot unplugging CPUs in order 1 to 3. When CPU2 is removed, policy->kobj would be moved to CPU3 and when CPU3 goes down we wouldn't free policy or its kobj as we want to retain permissions/values/etc. Now on resume, we will get CPU2 before CPU3 and will call __cpufreq_add_dev(). We will recover the old policy and update policy->cpu from 3 to 2 from update_policy_cpu(). But the kobj is still tied to CPU3 and isn't moved to CPU2. We wouldn't create a link for CPU2, but would try that for CPU3 while bringing it online. Which will report errors as CPU3 already has kobj assigned to it. This bug got introduced with commit 42f921a, which overlooked this scenario. To fix this, lets move kobj to the new policy->cpu while bringing first CPU of a cluster back. Also do a WARN_ON() if kobject_move failed, as we would reach here only for the first CPU of a non-boot cluster. And we can't recover from this situation, if kobject_move() fails. Fixes: 42f921a6f10c (cpufreq: remove sysfs files for CPUs which failed to come back after resume) Cc: 3.13+ # 3.13+ Reported-and-tested-by: Bu Yitian Reported-by: Saravana Kannan Reviewed-by: Srivatsa S. Bhat Signed-off-by: Viresh Kumar Signed-off-by: Rafael J. Wysocki --- drivers/cpufreq/cpufreq.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/cpufreq/cpufreq.c b/drivers/cpufreq/cpufreq.c index 62259d27f03e6..6f024852c6fbd 100644 --- a/drivers/cpufreq/cpufreq.c +++ b/drivers/cpufreq/cpufreq.c @@ -1153,10 +1153,12 @@ static int __cpufreq_add_dev(struct device *dev, struct subsys_interface *sif) * the creation of a brand new one. So we need to perform this update * by invoking update_policy_cpu(). */ - if (recover_policy && cpu != policy->cpu) + if (recover_policy && cpu != policy->cpu) { update_policy_cpu(policy, cpu); - else + WARN_ON(kobject_move(&policy->kobj, &dev->kobj)); + } else { policy->cpu = cpu; + } cpumask_copy(policy->cpus, cpumask_of(cpu)); From a2a9e02b5b67a7a32a14ab6c4c331a1a0c23a1db Mon Sep 17 00:00:00 2001 From: Andrey Utkin Date: Thu, 17 Jul 2014 15:13:23 +0300 Subject: [PATCH 232/268] drivers/ata/pata_ep93xx.c: use signed int type for result of platform_get_irq() [linux-3.16-rc5/drivers/ata/pata_ep93xx.c:929]: (style) Checking if unsigned variable 'irq' is less than zero. Source code is irq = platform_get_irq(pdev, 0); if (irq < 0) { but unsigned int irq; $ fgrep platform_get_irq `find . -name \*.h -print` ./include/linux/platform_device.h:extern int platform_get_irq(struct platform_device *, unsigned int); Now using "int" type instead of "unsigned int" for "irq" variable. Bugzilla: https://bugzilla.kernel.org/show_bug.cgi?id=80401 Reported-by: David Binderman Signed-off-by: Andrey Utkin Signed-off-by: Tejun Heo --- drivers/ata/pata_ep93xx.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/ata/pata_ep93xx.c b/drivers/ata/pata_ep93xx.c index 6ad5c072ce348..4d37c5415fc7f 100644 --- a/drivers/ata/pata_ep93xx.c +++ b/drivers/ata/pata_ep93xx.c @@ -915,7 +915,7 @@ static int ep93xx_pata_probe(struct platform_device *pdev) struct ep93xx_pata_data *drv_data; struct ata_host *host; struct ata_port *ap; - unsigned int irq; + int irq; struct resource *mem_res; void __iomem *ide_base; int err; From 144cb08864ed44be52d8634ac69cd98e5efcf527 Mon Sep 17 00:00:00 2001 From: Suravee Suthikulpanit Date: Tue, 15 Jul 2014 00:03:03 +0200 Subject: [PATCH 233/268] irqchip: gic: Add binding probe for ARM GIC400 Commit 3ab72f9156bb "dt-bindings: add GIC-400 binding" added the "arm,gic-400" compatible string, but the corresponding IRQCHIP_DECLARE was never added to the gic driver. Therefore add the missing irqchip declaration for it. Signed-off-by: Suravee Suthikulpanit Removed additional empty line and adapted commit message to mark it as fixing an issue. Signed-off-by: Heiko Stuebner Acked-by: Will Deacon Fixes: 3ab72f9156bb ("dt-bindings: add GIC-400 binding") Cc: # v3.14+ Link: https://lkml.kernel.org/r/2621565.f5eISveXXJ@diego Signed-off-by: Jason Cooper --- drivers/irqchip/irq-gic.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/irqchip/irq-gic.c b/drivers/irqchip/irq-gic.c index aadd6f5723cb6..66ed8922dab22 100644 --- a/drivers/irqchip/irq-gic.c +++ b/drivers/irqchip/irq-gic.c @@ -1071,6 +1071,7 @@ gic_of_init(struct device_node *node, struct device_node *parent) gic_cnt++; return 0; } +IRQCHIP_DECLARE(gic_400, "arm,gic-400", gic_of_init); IRQCHIP_DECLARE(cortex_a15_gic, "arm,cortex-a15-gic", gic_of_init); IRQCHIP_DECLARE(cortex_a9_gic, "arm,cortex-a9-gic", gic_of_init); IRQCHIP_DECLARE(cortex_a7_gic, "arm,cortex-a7-gic", gic_of_init); From 0ac66effe7fcdee55bda6d5d10d3372c95a41920 Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Mon, 14 Jul 2014 17:57:19 -0400 Subject: [PATCH 234/268] drm/radeon: avoid leaking edid data In some cases we fetch the edid in the detect() callback in order to determine what sort of monitor is connected. If that happens, don't fetch the edid again in the get_modes() callback or we will leak the edid. Signed-off-by: Alex Deucher Cc: stable@vger.kernel.org --- drivers/gpu/drm/radeon/radeon_display.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/drivers/gpu/drm/radeon/radeon_display.c b/drivers/gpu/drm/radeon/radeon_display.c index 13896edcf0b65..65501af453beb 100644 --- a/drivers/gpu/drm/radeon/radeon_display.c +++ b/drivers/gpu/drm/radeon/radeon_display.c @@ -830,6 +830,10 @@ int radeon_ddc_get_modes(struct radeon_connector *radeon_connector) struct radeon_device *rdev = dev->dev_private; int ret = 0; + /* don't leak the edid if we already fetched it in detect() */ + if (radeon_connector->edid) + goto got_edid; + /* on hw with routers, select right port */ if (radeon_connector->router.ddc_valid) radeon_router_select_ddc_port(radeon_connector); @@ -868,6 +872,7 @@ int radeon_ddc_get_modes(struct radeon_connector *radeon_connector) radeon_connector->edid = radeon_bios_get_hardcoded_edid(rdev); } if (radeon_connector->edid) { +got_edid: drm_mode_connector_update_edid_property(&radeon_connector->base, radeon_connector->edid); ret = drm_add_edid_modes(&radeon_connector->base, radeon_connector->edid); drm_edid_to_eld(&radeon_connector->base, radeon_connector->edid); From 201bb62402e0227375c655446ea04fcd0acf7287 Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Tue, 15 Jul 2014 09:48:53 -0400 Subject: [PATCH 235/268] drm/radeon: set default bl level to something reasonable If the value in the scratch register is 0, set it to the max level. This fixes an issue where the console fb blanking code calls back into the backlight driver on unblank and then sets the backlight level to 0 after the driver has already set the mode and enabled the backlight. bugs: https://bugs.freedesktop.org/show_bug.cgi?id=81382 https://bugs.freedesktop.org/show_bug.cgi?id=70207 Signed-off-by: Alex Deucher Tested-by: David Heidelberger Cc: stable@vger.kernel.org --- drivers/gpu/drm/radeon/atombios_encoders.c | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/radeon/atombios_encoders.c b/drivers/gpu/drm/radeon/atombios_encoders.c index 2b2908440644e..7d68203a3737f 100644 --- a/drivers/gpu/drm/radeon/atombios_encoders.c +++ b/drivers/gpu/drm/radeon/atombios_encoders.c @@ -183,7 +183,6 @@ void radeon_atom_backlight_init(struct radeon_encoder *radeon_encoder, struct backlight_properties props; struct radeon_backlight_privdata *pdata; struct radeon_encoder_atom_dig *dig; - u8 backlight_level; char bl_name[16]; /* Mac laptops with multiple GPUs use the gmux driver for backlight @@ -222,12 +221,17 @@ void radeon_atom_backlight_init(struct radeon_encoder *radeon_encoder, pdata->encoder = radeon_encoder; - backlight_level = radeon_atom_get_backlight_level_from_reg(rdev); - dig = radeon_encoder->enc_priv; dig->bl_dev = bd; bd->props.brightness = radeon_atom_backlight_get_brightness(bd); + /* Set a reasonable default here if the level is 0 otherwise + * fbdev will attempt to turn the backlight on after console + * unblanking and it will try and restore 0 which turns the backlight + * off again. + */ + if (bd->props.brightness == 0) + bd->props.brightness = RADEON_MAX_BL_LEVEL; bd->props.power = FB_BLANK_UNBLANK; backlight_update_status(bd); From f53f81b2576a9bd3af947e2b1c3a46dfab51c5ef Mon Sep 17 00:00:00 2001 From: Mario Kleiner Date: Thu, 3 Jul 2014 03:45:02 +0200 Subject: [PATCH 236/268] drm/radeon: Prevent too early kms-pageflips triggered by vblank. Since 3.16-rc1 we have this new failure: When the userspace XOrg ddx schedules vblank events to trigger deferred kms-pageflips, e.g., via the OML_sync_control extension call glXSwapBuffersMscOML(), or if a glXSwapBuffers() is called immediately after completion of a previous swapbuffers call, e.g., in a tight rendering loop with minimal rendering, it happens frequently that the pageflip ioctl() is executed within the same vblank in which a previous kms-pageflip completed, or - for deferred swaps - always one vblank earlier than requested by the client app. This causes premature pageflips and detection of failure by the ddx, e.g., XOrg log warnings like... "(WW) RADEON(1): radeon_dri2_flip_event_handler: Pageflip completion event has impossible msc 201025 < target_msc 201026" ... and error/invalid return values of glXWaitForSbcOML() and Intel_swap_events extension. Reason is the new way in which kms-pageflips are programmed since 3.16. This commit changes the time window in which the hw can execute pending programmed pageflips. Before, a pending flip would get executed anywhere within the vblank interval. Now a pending flip only gets executed at the leading edge of vblank (start of front porch), making sure that a invocation of the pageflip ioctl() within a given vblank interval will only lead to pageflip completion in the following vblank. Tested to death on a DCE-4 card. Signed-off-by: Mario Kleiner Signed-off-by: Alex Deucher --- drivers/gpu/drm/radeon/atombios_crtc.c | 8 ++++---- drivers/gpu/drm/radeon/evergreen.c | 5 +++-- drivers/gpu/drm/radeon/evergreen_reg.h | 1 - drivers/gpu/drm/radeon/rv515.c | 5 +++-- 4 files changed, 10 insertions(+), 9 deletions(-) diff --git a/drivers/gpu/drm/radeon/atombios_crtc.c b/drivers/gpu/drm/radeon/atombios_crtc.c index a03c73411a56a..30d242b25078e 100644 --- a/drivers/gpu/drm/radeon/atombios_crtc.c +++ b/drivers/gpu/drm/radeon/atombios_crtc.c @@ -1414,8 +1414,8 @@ static int dce4_crtc_do_set_base(struct drm_crtc *crtc, tmp &= ~EVERGREEN_GRPH_SURFACE_UPDATE_H_RETRACE_EN; WREG32(EVERGREEN_GRPH_FLIP_CONTROL + radeon_crtc->crtc_offset, tmp); - /* set pageflip to happen anywhere in vblank interval */ - WREG32(EVERGREEN_MASTER_UPDATE_MODE + radeon_crtc->crtc_offset, 0); + /* set pageflip to happen only at start of vblank interval (front porch) */ + WREG32(EVERGREEN_MASTER_UPDATE_MODE + radeon_crtc->crtc_offset, 3); if (!atomic && fb && fb != crtc->primary->fb) { radeon_fb = to_radeon_framebuffer(fb); @@ -1614,8 +1614,8 @@ static int avivo_crtc_do_set_base(struct drm_crtc *crtc, tmp &= ~AVIVO_D1GRPH_SURFACE_UPDATE_H_RETRACE_EN; WREG32(AVIVO_D1GRPH_FLIP_CONTROL + radeon_crtc->crtc_offset, tmp); - /* set pageflip to happen anywhere in vblank interval */ - WREG32(AVIVO_D1MODE_MASTER_UPDATE_MODE + radeon_crtc->crtc_offset, 0); + /* set pageflip to happen only at start of vblank interval (front porch) */ + WREG32(AVIVO_D1MODE_MASTER_UPDATE_MODE + radeon_crtc->crtc_offset, 3); if (!atomic && fb && fb != crtc->primary->fb) { radeon_fb = to_radeon_framebuffer(fb); diff --git a/drivers/gpu/drm/radeon/evergreen.c b/drivers/gpu/drm/radeon/evergreen.c index f7ece0ff431b3..250bac3935a43 100644 --- a/drivers/gpu/drm/radeon/evergreen.c +++ b/drivers/gpu/drm/radeon/evergreen.c @@ -2642,8 +2642,9 @@ void evergreen_mc_resume(struct radeon_device *rdev, struct evergreen_mc_save *s for (i = 0; i < rdev->num_crtc; i++) { if (save->crtc_enabled[i]) { tmp = RREG32(EVERGREEN_MASTER_UPDATE_MODE + crtc_offsets[i]); - if ((tmp & 0x3) != 0) { - tmp &= ~0x3; + if ((tmp & 0x7) != 3) { + tmp &= ~0x7; + tmp |= 0x3; WREG32(EVERGREEN_MASTER_UPDATE_MODE + crtc_offsets[i], tmp); } tmp = RREG32(EVERGREEN_GRPH_UPDATE + crtc_offsets[i]); diff --git a/drivers/gpu/drm/radeon/evergreen_reg.h b/drivers/gpu/drm/radeon/evergreen_reg.h index 333d143fca2cc..23bff590fb6e8 100644 --- a/drivers/gpu/drm/radeon/evergreen_reg.h +++ b/drivers/gpu/drm/radeon/evergreen_reg.h @@ -239,7 +239,6 @@ # define EVERGREEN_CRTC_V_BLANK (1 << 0) #define EVERGREEN_CRTC_STATUS_POSITION 0x6e90 #define EVERGREEN_CRTC_STATUS_HV_COUNT 0x6ea0 -#define EVERGREEN_MASTER_UPDATE_MODE 0x6ef8 #define EVERGREEN_CRTC_UPDATE_LOCK 0x6ed4 #define EVERGREEN_MASTER_UPDATE_LOCK 0x6ef4 #define EVERGREEN_MASTER_UPDATE_MODE 0x6ef8 diff --git a/drivers/gpu/drm/radeon/rv515.c b/drivers/gpu/drm/radeon/rv515.c index 237dd29d9f1c8..3e21e869015fe 100644 --- a/drivers/gpu/drm/radeon/rv515.c +++ b/drivers/gpu/drm/radeon/rv515.c @@ -406,8 +406,9 @@ void rv515_mc_resume(struct radeon_device *rdev, struct rv515_mc_save *save) for (i = 0; i < rdev->num_crtc; i++) { if (save->crtc_enabled[i]) { tmp = RREG32(AVIVO_D1MODE_MASTER_UPDATE_MODE + crtc_offsets[i]); - if ((tmp & 0x3) != 0) { - tmp &= ~0x3; + if ((tmp & 0x7) != 3) { + tmp &= ~0x7; + tmp |= 0x3; WREG32(AVIVO_D1MODE_MASTER_UPDATE_MODE + crtc_offsets[i], tmp); } tmp = RREG32(AVIVO_D1GRPH_UPDATE + crtc_offsets[i]); From c60381bd82a54233bb46f93be00a4154bd0cf95d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Michel=20D=C3=A4nzer?= Date: Mon, 14 Jul 2014 15:48:42 +0900 Subject: [PATCH 237/268] drm/radeon: Move pinning the BO back to radeon_crtc_page_flip() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit As well as enabling the vblank interrupt. These shouldn't take any significant amount of time, but at least pinning the BO has actually been seen to fail in practice before, in which case we need to let userspace know about it. Reviewed-by: Christian König Signed-off-by: Michel Dänzer Signed-off-by: Alex Deucher --- drivers/gpu/drm/radeon/radeon.h | 3 +- drivers/gpu/drm/radeon/radeon_display.c | 181 ++++++++++++------------ 2 files changed, 93 insertions(+), 91 deletions(-) diff --git a/drivers/gpu/drm/radeon/radeon.h b/drivers/gpu/drm/radeon/radeon.h index 29d9cc04c04ec..b7204500a9a69 100644 --- a/drivers/gpu/drm/radeon/radeon.h +++ b/drivers/gpu/drm/radeon/radeon.h @@ -684,10 +684,9 @@ struct radeon_flip_work { struct work_struct unpin_work; struct radeon_device *rdev; int crtc_id; - struct drm_framebuffer *fb; + uint64_t base; struct drm_pending_vblank_event *event; struct radeon_bo *old_rbo; - struct radeon_bo *new_rbo; struct radeon_fence *fence; }; diff --git a/drivers/gpu/drm/radeon/radeon_display.c b/drivers/gpu/drm/radeon/radeon_display.c index 65501af453beb..8de5794736455 100644 --- a/drivers/gpu/drm/radeon/radeon_display.c +++ b/drivers/gpu/drm/radeon/radeon_display.c @@ -386,11 +386,6 @@ static void radeon_flip_work_func(struct work_struct *__work) struct radeon_crtc *radeon_crtc = rdev->mode_info.crtcs[work->crtc_id]; struct drm_crtc *crtc = &radeon_crtc->base; - struct drm_framebuffer *fb = work->fb; - - uint32_t tiling_flags, pitch_pixels; - uint64_t base; - unsigned long flags; int r; @@ -411,26 +406,94 @@ static void radeon_flip_work_func(struct work_struct *__work) radeon_fence_unref(&work->fence); } + /* do the flip (mmio) */ + radeon_page_flip(rdev, radeon_crtc->crtc_id, work->base); + + /* We borrow the event spin lock for protecting flip_status */ + spin_lock_irqsave(&crtc->dev->event_lock, flags); + + /* set the proper interrupt */ + radeon_irq_kms_pflip_irq_get(rdev, radeon_crtc->crtc_id); + + radeon_crtc->flip_status = RADEON_FLIP_SUBMITTED; + spin_unlock_irqrestore(&crtc->dev->event_lock, flags); + up_read(&rdev->exclusive_lock); + + return; + +cleanup: + drm_gem_object_unreference_unlocked(&work->old_rbo->gem_base); + radeon_fence_unref(&work->fence); + kfree(work); + up_read(&rdev->exclusive_lock); +} + +static int radeon_crtc_page_flip(struct drm_crtc *crtc, + struct drm_framebuffer *fb, + struct drm_pending_vblank_event *event, + uint32_t page_flip_flags) +{ + struct drm_device *dev = crtc->dev; + struct radeon_device *rdev = dev->dev_private; + struct radeon_crtc *radeon_crtc = to_radeon_crtc(crtc); + struct radeon_framebuffer *old_radeon_fb; + struct radeon_framebuffer *new_radeon_fb; + struct drm_gem_object *obj; + struct radeon_flip_work *work; + struct radeon_bo *new_rbo; + uint32_t tiling_flags, pitch_pixels; + uint64_t base; + unsigned long flags; + int r; + + work = kzalloc(sizeof *work, GFP_KERNEL); + if (work == NULL) + return -ENOMEM; + + INIT_WORK(&work->flip_work, radeon_flip_work_func); + INIT_WORK(&work->unpin_work, radeon_unpin_work_func); + + work->rdev = rdev; + work->crtc_id = radeon_crtc->crtc_id; + work->event = event; + + /* schedule unpin of the old buffer */ + old_radeon_fb = to_radeon_framebuffer(crtc->primary->fb); + obj = old_radeon_fb->obj; + + /* take a reference to the old object */ + drm_gem_object_reference(obj); + work->old_rbo = gem_to_radeon_bo(obj); + + new_radeon_fb = to_radeon_framebuffer(fb); + obj = new_radeon_fb->obj; + new_rbo = gem_to_radeon_bo(obj); + + spin_lock(&new_rbo->tbo.bdev->fence_lock); + if (new_rbo->tbo.sync_obj) + work->fence = radeon_fence_ref(new_rbo->tbo.sync_obj); + spin_unlock(&new_rbo->tbo.bdev->fence_lock); + /* pin the new buffer */ - DRM_DEBUG_DRIVER("flip-ioctl() cur_fbo = %p, cur_bbo = %p\n", - work->old_rbo, work->new_rbo); + DRM_DEBUG_DRIVER("flip-ioctl() cur_rbo = %p, new_rbo = %p\n", + work->old_rbo, new_rbo); - r = radeon_bo_reserve(work->new_rbo, false); + r = radeon_bo_reserve(new_rbo, false); if (unlikely(r != 0)) { DRM_ERROR("failed to reserve new rbo buffer before flip\n"); goto cleanup; } /* Only 27 bit offset for legacy CRTC */ - r = radeon_bo_pin_restricted(work->new_rbo, RADEON_GEM_DOMAIN_VRAM, + r = radeon_bo_pin_restricted(new_rbo, RADEON_GEM_DOMAIN_VRAM, ASIC_IS_AVIVO(rdev) ? 0 : 1 << 27, &base); if (unlikely(r != 0)) { - radeon_bo_unreserve(work->new_rbo); + radeon_bo_unreserve(new_rbo); r = -EINVAL; DRM_ERROR("failed to pin new rbo buffer before flip\n"); goto cleanup; } - radeon_bo_get_tiling_flags(work->new_rbo, &tiling_flags, NULL); - radeon_bo_unreserve(work->new_rbo); + radeon_bo_get_tiling_flags(new_rbo, &tiling_flags, NULL); + radeon_bo_unreserve(new_rbo); if (!ASIC_IS_AVIVO(rdev)) { /* crtc offset is from display base addr not FB location */ @@ -467,6 +530,7 @@ static void radeon_flip_work_func(struct work_struct *__work) } base &= ~7; } + work->base = base; r = drm_vblank_get(crtc->dev, radeon_crtc->crtc_id); if (r) { @@ -477,100 +541,39 @@ static void radeon_flip_work_func(struct work_struct *__work) /* We borrow the event spin lock for protecting flip_work */ spin_lock_irqsave(&crtc->dev->event_lock, flags); - /* set the proper interrupt */ - radeon_irq_kms_pflip_irq_get(rdev, radeon_crtc->crtc_id); + if (radeon_crtc->flip_status != RADEON_FLIP_NONE) { + DRM_DEBUG_DRIVER("flip queue: crtc already busy\n"); + spin_unlock_irqrestore(&crtc->dev->event_lock, flags); + r = -EBUSY; + goto pflip_cleanup; + } + radeon_crtc->flip_status = RADEON_FLIP_PENDING; + radeon_crtc->flip_work = work; - /* do the flip (mmio) */ - radeon_page_flip(rdev, radeon_crtc->crtc_id, base); + /* update crtc fb */ + crtc->primary->fb = fb; - radeon_crtc->flip_status = RADEON_FLIP_SUBMITTED; spin_unlock_irqrestore(&crtc->dev->event_lock, flags); - up_read(&rdev->exclusive_lock); - return; + queue_work(radeon_crtc->flip_queue, &work->flip_work); + return 0; pflip_cleanup: - if (unlikely(radeon_bo_reserve(work->new_rbo, false) != 0)) { + if (unlikely(radeon_bo_reserve(new_rbo, false) != 0)) { DRM_ERROR("failed to reserve new rbo in error path\n"); goto cleanup; } - if (unlikely(radeon_bo_unpin(work->new_rbo) != 0)) { + if (unlikely(radeon_bo_unpin(new_rbo) != 0)) { DRM_ERROR("failed to unpin new rbo in error path\n"); } - radeon_bo_unreserve(work->new_rbo); + radeon_bo_unreserve(new_rbo); cleanup: drm_gem_object_unreference_unlocked(&work->old_rbo->gem_base); radeon_fence_unref(&work->fence); kfree(work); - up_read(&rdev->exclusive_lock); -} - -static int radeon_crtc_page_flip(struct drm_crtc *crtc, - struct drm_framebuffer *fb, - struct drm_pending_vblank_event *event, - uint32_t page_flip_flags) -{ - struct drm_device *dev = crtc->dev; - struct radeon_device *rdev = dev->dev_private; - struct radeon_crtc *radeon_crtc = to_radeon_crtc(crtc); - struct radeon_framebuffer *old_radeon_fb; - struct radeon_framebuffer *new_radeon_fb; - struct drm_gem_object *obj; - struct radeon_flip_work *work; - unsigned long flags; - - work = kzalloc(sizeof *work, GFP_KERNEL); - if (work == NULL) - return -ENOMEM; - - INIT_WORK(&work->flip_work, radeon_flip_work_func); - INIT_WORK(&work->unpin_work, radeon_unpin_work_func); - work->rdev = rdev; - work->crtc_id = radeon_crtc->crtc_id; - work->fb = fb; - work->event = event; - - /* schedule unpin of the old buffer */ - old_radeon_fb = to_radeon_framebuffer(crtc->primary->fb); - obj = old_radeon_fb->obj; - - /* take a reference to the old object */ - drm_gem_object_reference(obj); - work->old_rbo = gem_to_radeon_bo(obj); - - new_radeon_fb = to_radeon_framebuffer(fb); - obj = new_radeon_fb->obj; - work->new_rbo = gem_to_radeon_bo(obj); - - spin_lock(&work->new_rbo->tbo.bdev->fence_lock); - if (work->new_rbo->tbo.sync_obj) - work->fence = radeon_fence_ref(work->new_rbo->tbo.sync_obj); - spin_unlock(&work->new_rbo->tbo.bdev->fence_lock); - - /* We borrow the event spin lock for protecting flip_work */ - spin_lock_irqsave(&crtc->dev->event_lock, flags); - - if (radeon_crtc->flip_status != RADEON_FLIP_NONE) { - DRM_DEBUG_DRIVER("flip queue: crtc already busy\n"); - spin_unlock_irqrestore(&crtc->dev->event_lock, flags); - drm_gem_object_unreference_unlocked(&work->old_rbo->gem_base); - radeon_fence_unref(&work->fence); - kfree(work); - return -EBUSY; - } - radeon_crtc->flip_status = RADEON_FLIP_PENDING; - radeon_crtc->flip_work = work; - - /* update crtc fb */ - crtc->primary->fb = fb; - - spin_unlock_irqrestore(&crtc->dev->event_lock, flags); - - queue_work(radeon_crtc->flip_queue, &work->flip_work); - - return 0; + return r; } static int From 306f98d9a1079cc78567b1a6a5e94c272c163e47 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Michel=20D=C3=A4nzer?= Date: Mon, 14 Jul 2014 15:58:03 +0900 Subject: [PATCH 238/268] drm/radeon: Complete page flip even if waiting on the BO fence fails MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Otherwise the DRM core and userspace will be confused about which BO the CRTC is scanning out. Reviewed-by: Christian König Signed-off-by: Michel Dänzer Signed-off-by: Alex Deucher --- drivers/gpu/drm/radeon/radeon_display.c | 24 +++++++++--------------- 1 file changed, 9 insertions(+), 15 deletions(-) diff --git a/drivers/gpu/drm/radeon/radeon_display.c b/drivers/gpu/drm/radeon/radeon_display.c index 8de5794736455..97ea46597bead 100644 --- a/drivers/gpu/drm/radeon/radeon_display.c +++ b/drivers/gpu/drm/radeon/radeon_display.c @@ -390,20 +390,22 @@ static void radeon_flip_work_func(struct work_struct *__work) int r; down_read(&rdev->exclusive_lock); - while (work->fence) { + if (work->fence) { r = radeon_fence_wait(work->fence, false); if (r == -EDEADLK) { up_read(&rdev->exclusive_lock); r = radeon_gpu_reset(rdev); down_read(&rdev->exclusive_lock); } + if (r) + DRM_ERROR("failed to wait on page flip fence (%d)!\n", r); - if (r) { - DRM_ERROR("failed to wait on page flip fence (%d)!\n", - r); - goto cleanup; - } else - radeon_fence_unref(&work->fence); + /* We continue with the page flip even if we failed to wait on + * the fence, otherwise the DRM core and userspace will be + * confused about which BO the CRTC is scanning out + */ + + radeon_fence_unref(&work->fence); } /* do the flip (mmio) */ @@ -418,14 +420,6 @@ static void radeon_flip_work_func(struct work_struct *__work) radeon_crtc->flip_status = RADEON_FLIP_SUBMITTED; spin_unlock_irqrestore(&crtc->dev->event_lock, flags); up_read(&rdev->exclusive_lock); - - return; - -cleanup: - drm_gem_object_unreference_unlocked(&work->old_rbo->gem_base); - radeon_fence_unref(&work->fence); - kfree(work); - up_read(&rdev->exclusive_lock); } static int radeon_crtc_page_flip(struct drm_crtc *crtc, From c89e5be621e84b7b83650d87a956c52c5654c35b Mon Sep 17 00:00:00 2001 From: Mario Kleiner Date: Thu, 17 Jul 2014 01:27:25 +0200 Subject: [PATCH 239/268] drm/radeon: Remove redundant fence unref in pageflip path. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Not needed anymore, as it is already unreffed within radeon_flip_work_func() after its only use. Signed-off-by: Mario Kleiner Reviewed-by: Michel Dänzer Signed-off-by: Alex Deucher --- drivers/gpu/drm/radeon/radeon_display.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/gpu/drm/radeon/radeon_display.c b/drivers/gpu/drm/radeon/radeon_display.c index 97ea46597bead..bfa3a6cc170fa 100644 --- a/drivers/gpu/drm/radeon/radeon_display.c +++ b/drivers/gpu/drm/radeon/radeon_display.c @@ -366,7 +366,6 @@ void radeon_crtc_handle_flip(struct radeon_device *rdev, int crtc_id) spin_unlock_irqrestore(&rdev->ddev->event_lock, flags); drm_vblank_put(rdev->ddev, radeon_crtc->crtc_id); - radeon_fence_unref(&work->fence); radeon_irq_kms_pflip_irq_put(rdev, work->crtc_id); queue_work(radeon_crtc->flip_queue, &work->unpin_work); } From 826484977c29b42c8cb8c42bd41acaa6e152a4bb Mon Sep 17 00:00:00 2001 From: Mario Kleiner Date: Thu, 17 Jul 2014 01:37:53 +0200 Subject: [PATCH 240/268] drm/radeon: Add missing vblank_put in pageflip ioctl error path. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Mario Kleiner Reviewed-by: Michel Dänzer Signed-off-by: Alex Deucher --- drivers/gpu/drm/radeon/radeon_display.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/radeon/radeon_display.c b/drivers/gpu/drm/radeon/radeon_display.c index bfa3a6cc170fa..0a22a1bb688c5 100644 --- a/drivers/gpu/drm/radeon/radeon_display.c +++ b/drivers/gpu/drm/radeon/radeon_display.c @@ -538,7 +538,7 @@ static int radeon_crtc_page_flip(struct drm_crtc *crtc, DRM_DEBUG_DRIVER("flip queue: crtc already busy\n"); spin_unlock_irqrestore(&crtc->dev->event_lock, flags); r = -EBUSY; - goto pflip_cleanup; + goto vblank_cleanup; } radeon_crtc->flip_status = RADEON_FLIP_PENDING; radeon_crtc->flip_work = work; @@ -551,6 +551,9 @@ static int radeon_crtc_page_flip(struct drm_crtc *crtc, queue_work(radeon_crtc->flip_queue, &work->flip_work); return 0; +vblank_cleanup: + drm_vblank_put(crtc->dev, radeon_crtc->crtc_id); + pflip_cleanup: if (unlikely(radeon_bo_reserve(new_rbo, false) != 0)) { DRM_ERROR("failed to reserve new rbo in error path\n"); From 5f87e090a7368adc2290ae17ffd82a070caadd20 Mon Sep 17 00:00:00 2001 From: Mario Kleiner Date: Thu, 17 Jul 2014 02:24:45 +0200 Subject: [PATCH 241/268] drm/radeon: Make classic pageflip completion path less racy. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Need to protect mmio flip programming by event lock as well. Need to also first enable pflip irq, then mmio program, otherwise a flip completion may get unnoticed in the vblank of actual completion if the flip is programmed, but radeon_flip_work_func gets preempted immediately after mmio programming and before vblank. In that case the vblank irq handler wouldn't run radeon_crtc_handle_vblank() with the completion check routine, miss the completed flip, and only notice one vblank after actual completion, causing a false/delayed report of flip completion. Signed-off-by: Mario Kleiner Reviewed-by: Michel Dänzer Signed-off-by: Alex Deucher --- drivers/gpu/drm/radeon/radeon_display.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/radeon/radeon_display.c b/drivers/gpu/drm/radeon/radeon_display.c index 0a22a1bb688c5..bf25061c8ac4e 100644 --- a/drivers/gpu/drm/radeon/radeon_display.c +++ b/drivers/gpu/drm/radeon/radeon_display.c @@ -407,15 +407,15 @@ static void radeon_flip_work_func(struct work_struct *__work) radeon_fence_unref(&work->fence); } - /* do the flip (mmio) */ - radeon_page_flip(rdev, radeon_crtc->crtc_id, work->base); - /* We borrow the event spin lock for protecting flip_status */ spin_lock_irqsave(&crtc->dev->event_lock, flags); /* set the proper interrupt */ radeon_irq_kms_pflip_irq_get(rdev, radeon_crtc->crtc_id); + /* do the flip (mmio) */ + radeon_page_flip(rdev, radeon_crtc->crtc_id, work->base); + radeon_crtc->flip_status = RADEON_FLIP_SUBMITTED; spin_unlock_irqrestore(&crtc->dev->event_lock, flags); up_read(&rdev->exclusive_lock); From 953c66469735aed8d2ada639a72b150f01dae605 Mon Sep 17 00:00:00 2001 From: Abbas Raza Date: Thu, 17 Jul 2014 19:34:31 +0800 Subject: [PATCH 242/268] usb: chipidea: udc: Disable auto ZLP generation on ep0 There are 2 methods for ZLP (zero-length packet) generation: 1) In software 2) Automatic generation by device controller 1) is implemented in UDC driver and it attaches ZLP to IN packet if descriptor->size < wLength 2) can be enabled/disabled by setting ZLT bit in the QH When gadget ffs is connected to ubuntu host, the host sends get descriptor request and wLength in setup packet is 255 while the size of descriptor which will be sent by gadget in IN packet is 64 byte. So the composite driver sets req->zero = 1. In UDC driver following code will be executed then if (hwreq->req.zero && hwreq->req.length && (hwreq->req.length % hwep->ep.maxpacket == 0)) add_td_to_list(hwep, hwreq, 0); Case-A: So in case of ubuntu host, UDC driver will attach a ZLP to the IN packet. ubuntu host will request 255 byte in IN request, gadget will send 64 byte with ZLP and host will come to know that there is no more data. But hold on, by default ZLT=0 for endpoint 0 so hardware also tries to automatically generate the ZLP which blocks enumeration for ~6 seconds due to endpoint 0 STALL, NAKs are sent to host for any requests (OUT/PING) Case-B: In case when gadget ffs is connected to Apple device, Apple device sends setup packet with wLength=64. So descriptor->size = 64 and wLength=64 therefore req->zero = 0 and UDC driver will not attach any ZLP to the IN packet. Apple device requests 64 bytes, gets 64 bytes and doesn't further request for IN data. But ZLT=0 by default for endpoint 0 so hardware tries to automatically generate the ZLP which blocks enumeration for ~6 seconds due to endpoint 0 STALL, NAKs are sent to host for any requests (OUT/PING) According to USB2.0 specs: 8.5.3.2 Variable-length Data Stage A control pipe may have a variable-length data phase in which the host requests more data than is contained in the specified data structure. When all of the data structure is returned to the host, the function should indicate that the Data stage is ended by returning a packet that is shorter than the MaxPacketSize for the pipe. If the data structure is an exact multiple of wMaxPacketSize for the pipe, the function will return a zero-length packet to indicate the end of the Data stage. In Case-A mentioned above: If we disable software ZLP generation & ZLT=0 for endpoint 0 OR if software ZLP generation is not disabled but we set ZLT=1 for endpoint 0 then enumeration doesn't block for 6 seconds. In Case-B mentioned above: If we disable software ZLP generation & ZLT=0 for endpoint then enumeration still blocks due to ZLP automatically generated by hardware and host not needing it. But if we keep software ZLP generation enabled but we set ZLT=1 for endpoint 0 then enumeration doesn't block for 6 seconds. So the proper solution for this issue seems to disable automatic ZLP generation by hardware (i.e by setting ZLT=1 for endpoint 0) and let software (UDC driver) handle the ZLP generation based on req->zero field. Cc: stable@vger.kernel.org Signed-off-by: Abbas Raza Signed-off-by: Peter Chen Signed-off-by: Greg Kroah-Hartman --- drivers/usb/chipidea/udc.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/usb/chipidea/udc.c b/drivers/usb/chipidea/udc.c index 9d2b673f90e30..b8125aa64ad8c 100644 --- a/drivers/usb/chipidea/udc.c +++ b/drivers/usb/chipidea/udc.c @@ -1169,8 +1169,8 @@ static int ep_enable(struct usb_ep *ep, if (hwep->type == USB_ENDPOINT_XFER_CONTROL) cap |= QH_IOS; - if (hwep->num) - cap |= QH_ZLT; + + cap |= QH_ZLT; cap |= (hwep->ep.maxpacket << __ffs(QH_MAX_PKT)) & QH_MAX_PKT; /* * For ISO-TX, we set mult at QH as the largest value, and use From bb86cf569bbd7ad4dce581a37c7fbd748057e9dc Mon Sep 17 00:00:00 2001 From: Gavin Guo Date: Fri, 18 Jul 2014 01:12:13 +0800 Subject: [PATCH 243/268] usb: Check if port status is equal to RxDetect When using USB 3.0 pen drive with the [AMD] FCH USB XHCI Controller [1022:7814], the second hotplugging will experience the USB 3.0 pen drive is recognized as high-speed device. After bisecting the kernel, I found the commit number 41e7e056cdc662f704fa9262e5c6e213b4ab45dd (USB: Allow USB 3.0 ports to be disabled.) causes the bug. After doing some experiments, the bug can be fixed by avoiding executing the function hub_usb3_port_disable(). Because the port status with [AMD] FCH USB XHCI Controlleris [1022:7814] is already in RxDetect (I tried printing out the port status before setting to Disabled state), it's reasonable to check the port status before really executing hub_usb3_port_disable(). Fixes: 41e7e056cdc6 (USB: Allow USB 3.0 ports to be disabled.) Signed-off-by: Gavin Guo Acked-by: Alan Stern Cc: Signed-off-by: Greg Kroah-Hartman --- drivers/usb/core/hub.c | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) diff --git a/drivers/usb/core/hub.c b/drivers/usb/core/hub.c index 21b99b4b4082e..0e950ad8cb252 100644 --- a/drivers/usb/core/hub.c +++ b/drivers/usb/core/hub.c @@ -889,6 +889,25 @@ static int hub_usb3_port_disable(struct usb_hub *hub, int port1) if (!hub_is_superspeed(hub->hdev)) return -EINVAL; + ret = hub_port_status(hub, port1, &portstatus, &portchange); + if (ret < 0) + return ret; + + /* + * USB controller Advanced Micro Devices, Inc. [AMD] FCH USB XHCI + * Controller [1022:7814] will have spurious result making the following + * usb 3.0 device hotplugging route to the 2.0 root hub and recognized + * as high-speed device if we set the usb 3.0 port link state to + * Disabled. Since it's already in USB_SS_PORT_LS_RX_DETECT state, we + * check the state here to avoid the bug. + */ + if ((portstatus & USB_PORT_STAT_LINK_STATE) == + USB_SS_PORT_LS_RX_DETECT) { + dev_dbg(&hub->ports[port1 - 1]->dev, + "Not disabling port; link state is RxDetect\n"); + return ret; + } + ret = hub_set_port_link_state(hub, port1, USB_SS_PORT_LS_SS_DISABLED); if (ret) return ret; From 2b1987a9f1e6250c962ca13820d3c69817879266 Mon Sep 17 00:00:00 2001 From: Brian W Hart Date: Fri, 27 Jun 2014 16:09:39 -0500 Subject: [PATCH 244/268] cpufreq: make table sentinel macros unsigned to match use MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Commit 5eeaf1f18973 (cpufreq: Fix build error on some platforms that use cpufreq_for_each_*) moved function cpufreq_next_valid() to a public header. Warnings are now generated when objects including that header are built with -Wsign-compare (as an out-of-tree module might be): .../include/linux/cpufreq.h: In function ‘cpufreq_next_valid’: .../include/linux/cpufreq.h:519:27: warning: comparison between signed and unsigned integer expressions [-Wsign-compare] while ((*pos)->frequency != CPUFREQ_TABLE_END) ^ .../include/linux/cpufreq.h:520:25: warning: comparison between signed and unsigned integer expressions [-Wsign-compare] if ((*pos)->frequency != CPUFREQ_ENTRY_INVALID) ^ Constants CPUFREQ_ENTRY_INVALID and CPUFREQ_TABLE_END are signed, but are used with unsigned member 'frequency' of cpufreq_frequency_table. Update the macro definitions to be explicitly unsigned to match their use. This also corrects potentially wrong behavior of clk_rate_table_iter() if unsigned long is wider than usigned int. Fixes: 5eeaf1f18973 (cpufreq: Fix build error on some platforms that use cpufreq_for_each_*) Signed-off-by: Brian W Hart Reviewed-by: Simon Horman Acked-by: Viresh Kumar Signed-off-by: Rafael J. Wysocki --- include/linux/cpufreq.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/include/linux/cpufreq.h b/include/linux/cpufreq.h index ec4112d257bca..8f8ae95c6e279 100644 --- a/include/linux/cpufreq.h +++ b/include/linux/cpufreq.h @@ -482,8 +482,8 @@ extern struct cpufreq_governor cpufreq_gov_conservative; *********************************************************************/ /* Special Values of .frequency field */ -#define CPUFREQ_ENTRY_INVALID ~0 -#define CPUFREQ_TABLE_END ~1 +#define CPUFREQ_ENTRY_INVALID ~0u +#define CPUFREQ_TABLE_END ~1u /* Special Values of .flags field */ #define CPUFREQ_BOOST_FREQ (1 << 0) From 2ef82d24f445e82f80e235f44eb9d1bc933e3670 Mon Sep 17 00:00:00 2001 From: Dexuan Cui Date: Wed, 16 Jul 2014 00:00:45 -0700 Subject: [PATCH 245/268] Drivers: hv: hv_fcopy: fix a race condition for SMP guest We should schedule the 5s "timer work" before starting the data transfer, otherwise, the data transfer code may finish so fast on another virtual cpu that when the code(fcopy_write()) trying to cancel the 5s "timer work" can occasionally fail because the "timer work" may haven't been scheduled yet and as a result the fcopy process will be aborted wrongly by fcopy_work_func() in 5s. Thank Liz Zhang for the initial investigation on the bug. This addresses https://bugzilla.redhat.com/show_bug.cgi?id=1118123 Tested-by: Liz Zhang Cc: Haiyang Zhang Cc: stable@vger.kernel.org Signed-off-by: Dexuan Cui Signed-off-by: K. Y. Srinivasan Signed-off-by: Greg Kroah-Hartman --- drivers/hv/hv_fcopy.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/hv/hv_fcopy.c b/drivers/hv/hv_fcopy.c index eaaa3d843b805..23b2ce294c4ca 100644 --- a/drivers/hv/hv_fcopy.c +++ b/drivers/hv/hv_fcopy.c @@ -246,8 +246,8 @@ void hv_fcopy_onchannelcallback(void *context) /* * Send the information to the user-level daemon. */ - fcopy_send_data(); schedule_delayed_work(&fcopy_work, 5*HZ); + fcopy_send_data(); return; } icmsghdr->icflags = ICMSGHDRFLAG_TRANSACTION | ICMSGHDRFLAG_RESPONSE; From 03e97220b99b8b691ea5b130b7b4c135c9662792 Mon Sep 17 00:00:00 2001 From: Lucas Stach Date: Thu, 17 Jul 2014 12:20:14 +0200 Subject: [PATCH 246/268] ARM: clk-imx6q: parent lvds_sel input from upstream clock gates The i.MX6 reference manual doesn't make a clear distinction between the fixed clock divider and the enable gate for the pcie and sata reference clocks. This lead to the lvds mux inputs in the imx6q clk driver to be parented from the ref clock (which is the divider) instead of the actual gate, which in turn prevents the upstream clock to actually be enabled when lvds clk out is active. This fixes a hard machine hang regression in kernel 3.16 for boards where only pcie is active but no sata, as with this kernel version the imx6-pcie driver is no longer enabling the upstream clock directly but only lvds clk out. Reported-by: Arne Ruhnau Signed-off-by: Lucas Stach Tested-by: Arne Ruhnau Signed-off-by: Shawn Guo --- arch/arm/mach-imx/clk-imx6q.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/arm/mach-imx/clk-imx6q.c b/arch/arm/mach-imx/clk-imx6q.c index 8e795dea02ece..8556c787e59ca 100644 --- a/arch/arm/mach-imx/clk-imx6q.c +++ b/arch/arm/mach-imx/clk-imx6q.c @@ -70,7 +70,7 @@ static const char *cko_sels[] = { "cko1", "cko2", }; static const char *lvds_sels[] = { "dummy", "dummy", "dummy", "dummy", "dummy", "dummy", "pll4_audio", "pll5_video", "pll8_mlb", "enet_ref", - "pcie_ref", "sata_ref", + "pcie_ref_125m", "sata_ref_100m", }; enum mx6q_clks { @@ -491,7 +491,7 @@ static void __init imx6q_clocks_init(struct device_node *ccm_node) /* All existing boards with PCIe use LVDS1 */ if (IS_ENABLED(CONFIG_PCI_IMX6)) - clk_set_parent(clk[lvds1_sel], clk[sata_ref]); + clk_set_parent(clk[lvds1_sel], clk[sata_ref_100m]); /* Set initial power mode */ imx6q_set_lpm(WAIT_CLOCKED); From 79272b3562bb44ce7dc720cd13136f5a4a53c618 Mon Sep 17 00:00:00 2001 From: Bob Peterson Date: Fri, 20 Jun 2014 09:36:41 -0400 Subject: [PATCH 247/268] GFS2: Only wait for demote when last holder is dequeued Function gfs2_glock_dq_wait is supposed to dequeue a glock and then wait for the lock to be demoted. The problem is, if this is a shared lock, its demote will depend on the other holders, which means you might end up waiting forever because the other process is blocked. This problem is especially apparent when dealing with nested flocks. Signed-off-by: Bob Peterson Signed-off-by: Steven Whitehouse --- fs/gfs2/glock.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/fs/gfs2/glock.c b/fs/gfs2/glock.c index c355f7320e448..278fae5b6982f 100644 --- a/fs/gfs2/glock.c +++ b/fs/gfs2/glock.c @@ -1128,7 +1128,9 @@ void gfs2_glock_dq_wait(struct gfs2_holder *gh) struct gfs2_glock *gl = gh->gh_gl; gfs2_glock_dq(gh); might_sleep(); - wait_on_bit(&gl->gl_flags, GLF_DEMOTE, gfs2_glock_demote_wait, TASK_UNINTERRUPTIBLE); + if (!find_first_holder(gl)) + wait_on_bit(&gl->gl_flags, GLF_DEMOTE, gfs2_glock_demote_wait, + TASK_UNINTERRUPTIBLE); } /** From 94a09a3999ee978e097b5aad74034ed43bae56db Mon Sep 17 00:00:00 2001 From: Steven Whitehouse Date: Mon, 23 Jun 2014 14:43:32 +0100 Subject: [PATCH 248/268] GFS2: Fix race in glock lru glock disposal We must not leave items on the LRU list with GLF_LOCK set, since they can be removed if the glock is brought back into use, which may then potentially result in a hang, waiting for GLF_LOCK to clear. It doesn't happen very often, since it requires a glock that has not been used for a long time to be brought back into use at the same moment that the shrinker is part way through disposing of glocks. The fix is to set GLF_LOCK at a later time, when we already know that the other locks can be obtained. Also, we now only release the lru_lock in case a resched is needed, rather than on every iteration. Signed-off-by: Steven Whitehouse --- fs/gfs2/glock.c | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/fs/gfs2/glock.c b/fs/gfs2/glock.c index 278fae5b6982f..c1e5b126d2ca9 100644 --- a/fs/gfs2/glock.c +++ b/fs/gfs2/glock.c @@ -1406,12 +1406,16 @@ __acquires(&lru_lock) gl = list_entry(list->next, struct gfs2_glock, gl_lru); list_del_init(&gl->gl_lru); if (!spin_trylock(&gl->gl_spin)) { +add_back_to_lru: list_add(&gl->gl_lru, &lru_list); atomic_inc(&lru_count); continue; } + if (test_and_set_bit(GLF_LOCK, &gl->gl_flags)) { + spin_unlock(&gl->gl_spin); + goto add_back_to_lru; + } clear_bit(GLF_LRU, &gl->gl_flags); - spin_unlock(&lru_lock); gl->gl_lockref.count++; if (demote_ok(gl)) handle_callback(gl, LM_ST_UNLOCKED, 0, false); @@ -1419,7 +1423,7 @@ __acquires(&lru_lock) if (queue_delayed_work(glock_workqueue, &gl->gl_work, 0) == 0) gl->gl_lockref.count--; spin_unlock(&gl->gl_spin); - spin_lock(&lru_lock); + cond_resched_lock(&lru_lock); } } @@ -1444,7 +1448,7 @@ static long gfs2_scan_glock_lru(int nr) gl = list_entry(lru_list.next, struct gfs2_glock, gl_lru); /* Test for being demotable */ - if (!test_and_set_bit(GLF_LOCK, &gl->gl_flags)) { + if (!test_bit(GLF_LOCK, &gl->gl_flags)) { list_move(&gl->gl_lru, &dispose); atomic_dec(&lru_count); freed++; From fe0bbd2986996b9efe3a78bf5a591b0496c7afea Mon Sep 17 00:00:00 2001 From: Steven Whitehouse Date: Mon, 23 Jun 2014 14:50:20 +0100 Subject: [PATCH 249/268] GFS2: Use GFP_NOFS when allocating glocks Normally GFP_KERNEL is ok here, but there is now a rarely used code path relating to deallocation of unlinked inodes (in certain corner cases) which if hit at times of memory shortage can cause recursion while trying to free memory. One solution would be to try and move the gfs2_glock_get() call so that it is no longer called while another glock is held, but that doesn't look at all easy, so GFP_NOFS is the best solution for the time being. Signed-off-by: Steven Whitehouse --- fs/gfs2/glock.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/fs/gfs2/glock.c b/fs/gfs2/glock.c index c1e5b126d2ca9..b703dcc91588f 100644 --- a/fs/gfs2/glock.c +++ b/fs/gfs2/glock.c @@ -731,14 +731,14 @@ int gfs2_glock_get(struct gfs2_sbd *sdp, u64 number, cachep = gfs2_glock_aspace_cachep; else cachep = gfs2_glock_cachep; - gl = kmem_cache_alloc(cachep, GFP_KERNEL); + gl = kmem_cache_alloc(cachep, GFP_NOFS); if (!gl) return -ENOMEM; memset(&gl->gl_lksb, 0, sizeof(struct dlm_lksb)); if (glops->go_flags & GLOF_LVB) { - gl->gl_lksb.sb_lvbptr = kzalloc(GFS2_MIN_LVB_SIZE, GFP_KERNEL); + gl->gl_lksb.sb_lvbptr = kzalloc(GFS2_MIN_LVB_SIZE, GFP_NOFS); if (!gl->gl_lksb.sb_lvbptr) { kmem_cache_free(cachep, gl); return -ENOMEM; From 6ec43b1838bd71633ac3f853c63ddf1f5940b1ed Mon Sep 17 00:00:00 2001 From: Fabian Frederick Date: Wed, 25 Jun 2014 20:40:45 +0200 Subject: [PATCH 250/268] GFS2: replace count*size kzalloc by kcalloc kcalloc manages count*sizeof overflow. Cc: cluster-devel@redhat.com Signed-off-by: Fabian Frederick Signed-off-by: Steven Whitehouse --- fs/gfs2/lock_dlm.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/fs/gfs2/lock_dlm.c b/fs/gfs2/lock_dlm.c index 91f274de1246c..4fafea1c9ecf1 100644 --- a/fs/gfs2/lock_dlm.c +++ b/fs/gfs2/lock_dlm.c @@ -1036,8 +1036,8 @@ static int set_recover_size(struct gfs2_sbd *sdp, struct dlm_slot *slots, new_size = old_size + RECOVER_SIZE_INC; - submit = kzalloc(new_size * sizeof(uint32_t), GFP_NOFS); - result = kzalloc(new_size * sizeof(uint32_t), GFP_NOFS); + submit = kcalloc(new_size, sizeof(uint32_t), GFP_NOFS); + result = kcalloc(new_size, sizeof(uint32_t), GFP_NOFS); if (!submit || !result) { kfree(submit); kfree(result); From 5bef3e7cf18c56cc733777c61b6b61a0b8a62b35 Mon Sep 17 00:00:00 2001 From: Bob Peterson Date: Thu, 26 Jun 2014 10:46:25 -0400 Subject: [PATCH 251/268] GFS2: Allow flocks to use normal glock dq rather than dq_wait This patch allows flock glocks to use a non-blocking dequeue rather than dq_wait. It also reverts the previous patch I had posted regarding dq_wait. The reverted patch isn't necessarily a bad idea, but I decided this might avoid unforeseen side effects, and was therefore safer. Signed-off-by: Bob Peterson Signed-off-by: Steven Whitehouse --- fs/gfs2/file.c | 2 +- fs/gfs2/glock.c | 4 +--- 2 files changed, 2 insertions(+), 4 deletions(-) diff --git a/fs/gfs2/file.c b/fs/gfs2/file.c index 4fc3a3046174d..491e8e023598c 100644 --- a/fs/gfs2/file.c +++ b/fs/gfs2/file.c @@ -991,7 +991,7 @@ static int do_flock(struct file *file, int cmd, struct file_lock *fl) goto out; flock_lock_file_wait(file, &(struct file_lock){.fl_type = F_UNLCK}); - gfs2_glock_dq_wait(fl_gh); + gfs2_glock_dq(fl_gh); gfs2_holder_reinit(state, flags, fl_gh); } else { error = gfs2_glock_get(GFS2_SB(&ip->i_inode), ip->i_no_addr, diff --git a/fs/gfs2/glock.c b/fs/gfs2/glock.c index b703dcc91588f..ee4e04fe60fc5 100644 --- a/fs/gfs2/glock.c +++ b/fs/gfs2/glock.c @@ -1128,9 +1128,7 @@ void gfs2_glock_dq_wait(struct gfs2_holder *gh) struct gfs2_glock *gl = gh->gh_gl; gfs2_glock_dq(gh); might_sleep(); - if (!find_first_holder(gl)) - wait_on_bit(&gl->gl_flags, GLF_DEMOTE, gfs2_glock_demote_wait, - TASK_UNINTERRUPTIBLE); + wait_on_bit(&gl->gl_flags, GLF_DEMOTE, gfs2_glock_demote_wait, TASK_UNINTERRUPTIBLE); } /** From 97a4f1d7653684fff0d50e9328917506f06e9d79 Mon Sep 17 00:00:00 2001 From: Bob Peterson Date: Thu, 26 Jun 2014 10:47:48 -0400 Subject: [PATCH 252/268] GFS2: Allow caching of glocks for flock This patch removes the GLF_NOCACHE flag from the glocks associated with flocks. There should be no good reason not to cache glocks for flocks: they only force the glock to be demoted before they can be reacquired, which can slow down performance and even cause glock hangs, especially in cases where the flocks are held in Shared (SH) mode. Signed-off-by: Bob Peterson Signed-off-by: Steven Whitehouse --- fs/gfs2/file.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/gfs2/file.c b/fs/gfs2/file.c index 491e8e023598c..26b3f952e6b19 100644 --- a/fs/gfs2/file.c +++ b/fs/gfs2/file.c @@ -981,7 +981,7 @@ static int do_flock(struct file *file, int cmd, struct file_lock *fl) int error = 0; state = (fl->fl_type == F_WRLCK) ? LM_ST_EXCLUSIVE : LM_ST_SHARED; - flags = (IS_SETLKW(cmd) ? 0 : LM_FLAG_TRY) | GL_EXACT | GL_NOCACHE; + flags = (IS_SETLKW(cmd) ? 0 : LM_FLAG_TRY) | GL_EXACT; mutex_lock(&fp->f_fl_mutex); From 6b49d1d9c3c1088758c6a2758aaa5d236ef609e2 Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Sun, 29 Jun 2014 12:21:39 +0200 Subject: [PATCH 253/268] GFS2: memcontrol: Spelling s/invlidate/invalidate/ Signed-off-by: Geert Uytterhoeven Cc: cluster-devel@redhat.com Signed-off-by: Steven Whitehouse --- fs/gfs2/glops.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/fs/gfs2/glops.c b/fs/gfs2/glops.c index fc1100781bbc1..2ffc67dce87f2 100644 --- a/fs/gfs2/glops.c +++ b/fs/gfs2/glops.c @@ -234,8 +234,8 @@ static void inode_go_sync(struct gfs2_glock *gl) * inode_go_inval - prepare a inode glock to be released * @gl: the glock * @flags: - * - * Normally we invlidate everything, but if we are moving into + * + * Normally we invalidate everything, but if we are moving into * LM_ST_DEFERRED from LM_ST_SHARED or LM_ST_EXCLUSIVE then we * can keep hold of the metadata, since it won't have changed. * From 27ff6a0f7f5bf500e9d2a8760c062789b52c551f Mon Sep 17 00:00:00 2001 From: Fabian Frederick Date: Wed, 2 Jul 2014 22:05:27 +0200 Subject: [PATCH 254/268] GFS2: fs/gfs2/rgrp.c: kernel-doc warning fixes Cc: cluster-devel@redhat.com Signed-off-by: Fabian Frederick Signed-off-by: Steven Whitehouse --- fs/gfs2/rgrp.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/fs/gfs2/rgrp.c b/fs/gfs2/rgrp.c index db629d1bd1bd9..f4cb9c0d6bbdc 100644 --- a/fs/gfs2/rgrp.c +++ b/fs/gfs2/rgrp.c @@ -337,7 +337,7 @@ static bool gfs2_unaligned_extlen(struct gfs2_rbm *rbm, u32 n_unaligned, u32 *le /** * gfs2_free_extlen - Return extent length of free blocks - * @rbm: Starting position + * @rrbm: Starting position * @len: Max length to check * * Starting at the block specified by the rbm, see how many free blocks @@ -2522,7 +2522,7 @@ void gfs2_rlist_alloc(struct gfs2_rgrp_list *rlist, unsigned int state) /** * gfs2_rlist_free - free a resource group list - * @list: the list of resource groups + * @rlist: the list of resource groups * */ From 29e697b11853d3f83b1864ae385abdad4aa2c361 Mon Sep 17 00:00:00 2001 From: Tomasz Figa Date: Thu, 17 Jul 2014 17:23:44 +0200 Subject: [PATCH 255/268] irqchip: gic: Fix core ID calculation when topology is read from DT Certain GIC implementation, namely those found on earlier, single cluster, Exynos SoCs, have registers mapped without per-CPU banking, which means that the driver needs to use different offset for each CPU. Currently the driver calculates the offset by multiplying value returned by cpu_logical_map() by CPU offset parsed from DT. This is correct when CPU topology is not specified in DT and aforementioned function returns core ID alone. However when DT contains CPU topology, the function changes to return cluster ID as well, which is non-zero on mentioned SoCs and so breaks the calculation in GIC driver. This patch fixes this by masking out cluster ID in CPU offset calculation so that only core ID is considered. Multi-cluster Exynos SoCs already have banked GIC implementations, so this simple fix should be enough. Reported-by: Lorenzo Pieralisi Reported-by: Bartlomiej Zolnierkiewicz Signed-off-by: Tomasz Figa Fixes: db0d4db22a78d ("ARM: gic: allow GIC to support non-banked setups") Cc: # v3.3+ Link: https://lkml.kernel.org/r/1405610624-18722-1-git-send-email-t.figa@samsung.com Signed-off-by: Jason Cooper --- drivers/irqchip/irq-gic.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/irqchip/irq-gic.c b/drivers/irqchip/irq-gic.c index 66ed8922dab22..7c131cf7cc132 100644 --- a/drivers/irqchip/irq-gic.c +++ b/drivers/irqchip/irq-gic.c @@ -42,6 +42,7 @@ #include #include +#include #include #include #include @@ -954,7 +955,9 @@ void __init gic_init_bases(unsigned int gic_nr, int irq_start, } for_each_possible_cpu(cpu) { - unsigned long offset = percpu_offset * cpu_logical_map(cpu); + u32 mpidr = cpu_logical_map(cpu); + u32 core_id = MPIDR_AFFINITY_LEVEL(mpidr, 0); + unsigned long offset = percpu_offset * core_id; *per_cpu_ptr(gic->dist_base.percpu_base, cpu) = dist_base + offset; *per_cpu_ptr(gic->cpu_base.percpu_base, cpu) = cpu_base + offset; } From dba1fd0bff38966f16bbe194fb451f73ddaafb58 Mon Sep 17 00:00:00 2001 From: Bo Shen Date: Mon, 14 Jul 2014 11:08:14 +0800 Subject: [PATCH 256/268] ARM: at91: at91sam9x5: correct typo error for ohci clock Correct the typo error for the second "uhphs_clk". Signed-off-by: Bo Shen Acked-by: Boris Brezillon Signed-off-by: Nicolas Ferre --- arch/arm/boot/dts/at91sam9x5.dtsi | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/arch/arm/boot/dts/at91sam9x5.dtsi b/arch/arm/boot/dts/at91sam9x5.dtsi index d6133f497207d..ae34c9ca4bba1 100644 --- a/arch/arm/boot/dts/at91sam9x5.dtsi +++ b/arch/arm/boot/dts/at91sam9x5.dtsi @@ -1153,8 +1153,7 @@ compatible = "atmel,at91rm9200-ohci", "usb-ohci"; reg = <0x00600000 0x100000>; interrupts = <22 IRQ_TYPE_LEVEL_HIGH 2>; - clocks = <&usb>, <&uhphs_clk>, <&udphs_clk>, - <&uhpck>; + clocks = <&usb>, <&uhphs_clk>, <&uhphs_clk>, <&uhpck>; clock-names = "usb_clk", "ohci_clk", "hclk", "uhpck"; status = "disabled"; }; From 043dfc1b624caf67a52412412a7ccce2d7d2b7f5 Mon Sep 17 00:00:00 2001 From: Boris BREZILLON Date: Mon, 14 Jul 2014 08:39:27 +0200 Subject: [PATCH 257/268] ARM: at91/dt: fix usb0 clocks definition in sam9n12 dtsi udphs_clk (USB Device Controller clock) is referenced instead of uhphs_clk (USB Host Controller clock). Signed-off-by: Boris BREZILLON Acked-by: Alexandre Belloni Signed-off-by: Nicolas Ferre --- arch/arm/boot/dts/at91sam9n12.dtsi | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm/boot/dts/at91sam9n12.dtsi b/arch/arm/boot/dts/at91sam9n12.dtsi index 287795985e32f..b84bac5bada40 100644 --- a/arch/arm/boot/dts/at91sam9n12.dtsi +++ b/arch/arm/boot/dts/at91sam9n12.dtsi @@ -925,7 +925,7 @@ compatible = "atmel,at91rm9200-ohci", "usb-ohci"; reg = <0x00500000 0x00100000>; interrupts = <22 IRQ_TYPE_LEVEL_HIGH 2>; - clocks = <&usb>, <&uhphs_clk>, <&udphs_clk>, + clocks = <&usb>, <&uhphs_clk>, <&uhphs_clk>, <&uhpck>; clock-names = "usb_clk", "ohci_clk", "hclk", "uhpck"; status = "disabled"; From e0d69e119fc6bf7cc3c9f791478108c1b925bb2e Mon Sep 17 00:00:00 2001 From: Boris BREZILLON Date: Thu, 17 Jul 2014 21:03:58 +0200 Subject: [PATCH 258/268] ARM: at91/dt: add missing clocks property to pwm node in sam9x5.dtsi The pwm driver requires a clocks property referencing the pwm peripheral clk. Signed-off-by: Boris BREZILLON Signed-off-by: Nicolas Ferre --- arch/arm/boot/dts/at91sam9x5.dtsi | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/arm/boot/dts/at91sam9x5.dtsi b/arch/arm/boot/dts/at91sam9x5.dtsi index ae34c9ca4bba1..5ef716ddd5fef 100644 --- a/arch/arm/boot/dts/at91sam9x5.dtsi +++ b/arch/arm/boot/dts/at91sam9x5.dtsi @@ -1122,6 +1122,7 @@ compatible = "atmel,at91sam9rl-pwm"; reg = <0xf8034000 0x300>; interrupts = <18 IRQ_TYPE_LEVEL_HIGH 4>; + clocks = <&pwm_clk>; #pwm-cells = <3>; status = "disabled"; }; From b32bfc06aefab61acc872dec3222624e6cd867ed Mon Sep 17 00:00:00 2001 From: Romain Degez Date: Fri, 11 Jul 2014 18:08:13 +0200 Subject: [PATCH 259/268] ahci: add support for the Promise FastTrak TX8660 SATA HBA (ahci mode) Add support of the Promise FastTrak TX8660 SATA HBA in ahci mode by registering the board in the ahci_pci_tbl[]. Note: this HBA also provide a hardware RAID mode when activated in BIOS but specific drivers from the manufacturer are required in this case. Signed-off-by: Romain Degez Tested-by: Romain Degez Signed-off-by: Tejun Heo Cc: stable@vger.kernel.org --- drivers/ata/ahci.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/ata/ahci.c b/drivers/ata/ahci.c index dae5607e11153..4cd52a4541a96 100644 --- a/drivers/ata/ahci.c +++ b/drivers/ata/ahci.c @@ -456,6 +456,7 @@ static const struct pci_device_id ahci_pci_tbl[] = { /* Promise */ { PCI_VDEVICE(PROMISE, 0x3f20), board_ahci }, /* PDC42819 */ + { PCI_VDEVICE(PROMISE, 0x3781), board_ahci }, /* FastTrak TX8660 ahci-mode */ /* Asmedia */ { PCI_VDEVICE(ASMEDIA, 0x0601), board_ahci }, /* ASM1060 */ From 9637f30e6b7bc394c08fa9d27d63622f141142e9 Mon Sep 17 00:00:00 2001 From: Tomasz Figa Date: Wed, 16 Jul 2014 02:59:18 +0900 Subject: [PATCH 260/268] ARM: EXYNOS: Fix core ID used by platsmp and hotplug code When CPU topology is specified in device tree, cpu_logical_map() does not return core ID anymore, but rather full MPIDR value. This breaks existing calculation of PMU register offsets on Exynos SoCs. This patch fixes the problem by adjusting the code to use only core ID bits of the value returned by cpu_logical_map() to allow CPU topology to be specified in device tree on Exynos SoCs. Signed-off-by: Tomasz Figa Signed-off-by: Kukjin Kim Signed-off-by: Olof Johansson --- arch/arm/mach-exynos/hotplug.c | 10 ++++++---- arch/arm/mach-exynos/platsmp.c | 34 +++++++++++++++++++--------------- 2 files changed, 25 insertions(+), 19 deletions(-) diff --git a/arch/arm/mach-exynos/hotplug.c b/arch/arm/mach-exynos/hotplug.c index 8a134d019cb3a..920a4baa53cd7 100644 --- a/arch/arm/mach-exynos/hotplug.c +++ b/arch/arm/mach-exynos/hotplug.c @@ -40,15 +40,17 @@ static inline void cpu_leave_lowpower(void) static inline void platform_do_lowpower(unsigned int cpu, int *spurious) { + u32 mpidr = cpu_logical_map(cpu); + u32 core_id = MPIDR_AFFINITY_LEVEL(mpidr, 0); + for (;;) { - /* make cpu1 to be turned off at next WFI command */ - if (cpu == 1) - exynos_cpu_power_down(cpu); + /* Turn the CPU off on next WFI instruction. */ + exynos_cpu_power_down(core_id); wfi(); - if (pen_release == cpu_logical_map(cpu)) { + if (pen_release == core_id) { /* * OK, proper wakeup, we're done */ diff --git a/arch/arm/mach-exynos/platsmp.c b/arch/arm/mach-exynos/platsmp.c index 1c8d31e395200..50b9aad5e27b7 100644 --- a/arch/arm/mach-exynos/platsmp.c +++ b/arch/arm/mach-exynos/platsmp.c @@ -90,7 +90,8 @@ static void exynos_secondary_init(unsigned int cpu) static int exynos_boot_secondary(unsigned int cpu, struct task_struct *idle) { unsigned long timeout; - unsigned long phys_cpu = cpu_logical_map(cpu); + u32 mpidr = cpu_logical_map(cpu); + u32 core_id = MPIDR_AFFINITY_LEVEL(mpidr, 0); int ret = -ENOSYS; /* @@ -104,17 +105,18 @@ static int exynos_boot_secondary(unsigned int cpu, struct task_struct *idle) * the holding pen - release it, then wait for it to flag * that it has been released by resetting pen_release. * - * Note that "pen_release" is the hardware CPU ID, whereas + * Note that "pen_release" is the hardware CPU core ID, whereas * "cpu" is Linux's internal ID. */ - write_pen_release(phys_cpu); + write_pen_release(core_id); - if (!exynos_cpu_power_state(cpu)) { - exynos_cpu_power_up(cpu); + if (!exynos_cpu_power_state(core_id)) { + exynos_cpu_power_up(core_id); timeout = 10; /* wait max 10 ms until cpu1 is on */ - while (exynos_cpu_power_state(cpu) != S5P_CORE_LOCAL_PWR_EN) { + while (exynos_cpu_power_state(core_id) + != S5P_CORE_LOCAL_PWR_EN) { if (timeout-- == 0) break; @@ -145,20 +147,20 @@ static int exynos_boot_secondary(unsigned int cpu, struct task_struct *idle) * Try to set boot address using firmware first * and fall back to boot register if it fails. */ - ret = call_firmware_op(set_cpu_boot_addr, phys_cpu, boot_addr); + ret = call_firmware_op(set_cpu_boot_addr, core_id, boot_addr); if (ret && ret != -ENOSYS) goto fail; if (ret == -ENOSYS) { - void __iomem *boot_reg = cpu_boot_reg(phys_cpu); + void __iomem *boot_reg = cpu_boot_reg(core_id); if (IS_ERR(boot_reg)) { ret = PTR_ERR(boot_reg); goto fail; } - __raw_writel(boot_addr, cpu_boot_reg(phys_cpu)); + __raw_writel(boot_addr, cpu_boot_reg(core_id)); } - call_firmware_op(cpu_boot, phys_cpu); + call_firmware_op(cpu_boot, core_id); arch_send_wakeup_ipi_mask(cpumask_of(cpu)); @@ -227,22 +229,24 @@ static void __init exynos_smp_prepare_cpus(unsigned int max_cpus) * boot register if it fails. */ for (i = 1; i < max_cpus; ++i) { - unsigned long phys_cpu; unsigned long boot_addr; + u32 mpidr; + u32 core_id; int ret; - phys_cpu = cpu_logical_map(i); + mpidr = cpu_logical_map(i); + core_id = MPIDR_AFFINITY_LEVEL(mpidr, 0); boot_addr = virt_to_phys(exynos4_secondary_startup); - ret = call_firmware_op(set_cpu_boot_addr, phys_cpu, boot_addr); + ret = call_firmware_op(set_cpu_boot_addr, core_id, boot_addr); if (ret && ret != -ENOSYS) break; if (ret == -ENOSYS) { - void __iomem *boot_reg = cpu_boot_reg(phys_cpu); + void __iomem *boot_reg = cpu_boot_reg(core_id); if (IS_ERR(boot_reg)) break; - __raw_writel(boot_addr, cpu_boot_reg(phys_cpu)); + __raw_writel(boot_addr, cpu_boot_reg(core_id)); } } } From 79a8468747c5f95ed3d5ce8376a3e82e0c5857fc Mon Sep 17 00:00:00 2001 From: Hannes Frederic Sowa Date: Fri, 18 Jul 2014 17:26:41 -0400 Subject: [PATCH 261/268] random: check for increase of entropy_count because of signed conversion The expression entropy_count -= ibytes << (ENTROPY_SHIFT + 3) could actually increase entropy_count if during assignment of the unsigned expression on the RHS (mind the -=) we reduce the value modulo 2^width(int) and assign it to entropy_count. Trinity found this. [ Commit modified by tytso to add an additional safety check for a negative entropy_count -- which should never happen, and to also add an additional paranoia check to prevent overly large count values to be passed into urandom_read(). ] Reported-by: Dave Jones Signed-off-by: Hannes Frederic Sowa Signed-off-by: Theodore Ts'o Cc: stable@vger.kernel.org --- drivers/char/random.c | 17 ++++++++++++++--- 1 file changed, 14 insertions(+), 3 deletions(-) diff --git a/drivers/char/random.c b/drivers/char/random.c index 0a7ac0a7b2520..71529e196b847 100644 --- a/drivers/char/random.c +++ b/drivers/char/random.c @@ -641,7 +641,7 @@ static void credit_entropy_bits(struct entropy_store *r, int nbits) } while (unlikely(entropy_count < pool_size-2 && pnfrac)); } - if (entropy_count < 0) { + if (unlikely(entropy_count < 0)) { pr_warn("random: negative entropy/overflow: pool %s count %d\n", r->name, entropy_count); WARN_ON(1); @@ -981,7 +981,7 @@ static size_t account(struct entropy_store *r, size_t nbytes, int min, int reserved) { int entropy_count, orig; - size_t ibytes; + size_t ibytes, nfrac; BUG_ON(r->entropy_count > r->poolinfo->poolfracbits); @@ -999,7 +999,17 @@ static size_t account(struct entropy_store *r, size_t nbytes, int min, } if (ibytes < min) ibytes = 0; - if ((entropy_count -= ibytes << (ENTROPY_SHIFT + 3)) < 0) + + if (unlikely(entropy_count < 0)) { + pr_warn("random: negative entropy count: pool %s count %d\n", + r->name, entropy_count); + WARN_ON(1); + entropy_count = 0; + } + nfrac = ibytes << (ENTROPY_SHIFT + 3); + if ((size_t) entropy_count > nfrac) + entropy_count -= nfrac; + else entropy_count = 0; if (cmpxchg(&r->entropy_count, orig, entropy_count) != orig) @@ -1376,6 +1386,7 @@ urandom_read(struct file *file, char __user *buf, size_t nbytes, loff_t *ppos) "with %d bits of entropy available\n", current->comm, nonblocking_pool.entropy_total); + nbytes = min_t(size_t, nbytes, INT_MAX >> (ENTROPY_SHIFT + 3)); ret = extract_entropy_user(&nonblocking_pool, buf, nbytes); trace_urandom_read(8 * nbytes, ENTROPY_BITS(&nonblocking_pool), From 98ce2deda23a303682a4253f3016a1436f4b2735 Mon Sep 17 00:00:00 2001 From: Liu Bo Date: Thu, 17 Jul 2014 16:08:36 +0800 Subject: [PATCH 262/268] Btrfs: fix abnormal long waiting in fsync xfstests generic/127 detected this problem. With commit 7fc34a62ca4434a79c68e23e70ed26111b7a4cf8, now fsync will only flush data within the passed range. This is the cause of the above problem, -- btrfs's fsync has a stage called 'sync log' which will wait for all the ordered extents it've recorded to finish. In xfstests/generic/127, with mixed operations such as truncate, fallocate, punch hole, and mapwrite, we get some pre-allocated extents, and mapwrite will mmap, and then msync. And I find that msync will wait for quite a long time (about 20s in my case), thanks to ftrace, it turns out that the previous fallocate calls 'btrfs_wait_ordered_range()' to flush dirty pages, but as the range of dirty pages may be larger than 'btrfs_wait_ordered_range()' wants, there can be some ordered extents created but not getting corresponding pages flushed, then they're left in memory until we fsync which runs into the stage 'sync log', and fsync will just wait for the system writeback thread to flush those pages and get ordered extents finished, so the latency is inevitable. This adds a flush similar to btrfs_start_ordered_extent() in btrfs_wait_logged_extents() to fix that. Reviewed-by: Miao Xie Signed-off-by: Liu Bo Signed-off-by: Chris Mason --- fs/btrfs/ordered-data.c | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/fs/btrfs/ordered-data.c b/fs/btrfs/ordered-data.c index e12441c7cf1d6..7187b14faa6cd 100644 --- a/fs/btrfs/ordered-data.c +++ b/fs/btrfs/ordered-data.c @@ -484,8 +484,19 @@ void btrfs_wait_logged_extents(struct btrfs_root *log, u64 transid) log_list); list_del_init(&ordered->log_list); spin_unlock_irq(&log->log_extents_lock[index]); + + if (!test_bit(BTRFS_ORDERED_IO_DONE, &ordered->flags) && + !test_bit(BTRFS_ORDERED_DIRECT, &ordered->flags)) { + struct inode *inode = ordered->inode; + u64 start = ordered->file_offset; + u64 end = ordered->file_offset + ordered->len - 1; + + WARN_ON(!inode); + filemap_fdatawrite_range(inode->i_mapping, start, end); + } wait_event(ordered->wait, test_bit(BTRFS_ORDERED_IO_DONE, &ordered->flags)); + btrfs_put_ordered_extent(ordered); spin_lock_irq(&log->log_extents_lock[index]); } From 0bfaa9c5cb479cebc24979b384374fe47500b4c9 Mon Sep 17 00:00:00 2001 From: Eric Sandeen Date: Mon, 7 Jul 2014 12:34:49 -0500 Subject: [PATCH 263/268] btrfs: test for valid bdev before kobj removal in btrfs_rm_device commit 99994cd btrfs: dev delete should remove sysfs entry added a btrfs_kobj_rm_device, which dereferences device->bdev... right after we check whether device->bdev might be NULL. I don't honestly know if it's possible to have a NULL device->bdev here, but assuming that it is (given the test), we need to move the kobject removal to be under that test. (Coverity spotted this) Signed-off-by: Eric Sandeen Signed-off-by: Chris Mason --- fs/btrfs/volumes.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c index 6104676857f5c..6cb82f62cb7c2 100644 --- a/fs/btrfs/volumes.c +++ b/fs/btrfs/volumes.c @@ -1680,11 +1680,11 @@ int btrfs_rm_device(struct btrfs_root *root, char *device_path) if (device->bdev == root->fs_info->fs_devices->latest_bdev) root->fs_info->fs_devices->latest_bdev = next_device->bdev; - if (device->bdev) + if (device->bdev) { device->fs_devices->open_devices--; - - /* remove sysfs entry */ - btrfs_kobj_rm_device(root->fs_info, device); + /* remove sysfs entry */ + btrfs_kobj_rm_device(root->fs_info, device); + } call_rcu(&device->rcu, free_device); From ae5db6d12341684913a78b6537c0b9c22c999b5c Mon Sep 17 00:00:00 2001 From: Richard Weinberger Date: Sun, 20 Jul 2014 12:56:34 +0200 Subject: [PATCH 264/268] Revert "um: Fix wait_stub_done() error handling" This reverts commit 0974a9cadc7886f7baaa458bb0c89f5c5f9d458e. The real for for that issue is to release current->mm->mmap_sem in fix_range_common(). Signed-off-by: Richard Weinberger --- arch/um/os-Linux/skas/process.c | 9 ++------- 1 file changed, 2 insertions(+), 7 deletions(-) diff --git a/arch/um/os-Linux/skas/process.c b/arch/um/os-Linux/skas/process.c index d531879a46176..908579f2b0ab1 100644 --- a/arch/um/os-Linux/skas/process.c +++ b/arch/um/os-Linux/skas/process.c @@ -54,7 +54,7 @@ static int ptrace_dump_regs(int pid) void wait_stub_done(int pid) { - int n, status, err, bad_stop = 0; + int n, status, err; while (1) { CATCH_EINTR(n = waitpid(pid, &status, WUNTRACED | __WALL)); @@ -74,8 +74,6 @@ void wait_stub_done(int pid) if (((1 << WSTOPSIG(status)) & STUB_DONE_MASK) != 0) return; - else - bad_stop = 1; bad_wait: err = ptrace_dump_regs(pid); @@ -85,10 +83,7 @@ void wait_stub_done(int pid) printk(UM_KERN_ERR "wait_stub_done : failed to wait for SIGTRAP, " "pid = %d, n = %d, errno = %d, status = 0x%x\n", pid, n, errno, status); - if (bad_stop) - kill(pid, SIGKILL); - else - fatal_sigsegv(); + fatal_sigsegv(); } extern unsigned long current_stub_stack(void); From 284e6d39516cc7f9fbceebb259849fcb41559a7b Mon Sep 17 00:00:00 2001 From: Richard Weinberger Date: Sun, 20 Jul 2014 13:09:15 +0200 Subject: [PATCH 265/268] um: Ensure that a stub page cannot get unmapped MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Trinity discovered an execution path such that a task can unmap his stub page. Reported-by: Toralf Förster Signed-off-by: Richard Weinberger --- arch/um/kernel/tlb.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/arch/um/kernel/tlb.c b/arch/um/kernel/tlb.c index 9472079471bbf..1fc619e5dfe9e 100644 --- a/arch/um/kernel/tlb.c +++ b/arch/um/kernel/tlb.c @@ -124,6 +124,9 @@ static int add_munmap(unsigned long addr, unsigned long len, struct host_vm_op *last; int ret = 0; + if ((addr >= STUB_START) && (addr < STUB_END)) + return -EINVAL; + if (hvc->index != 0) { last = &hvc->ops[hvc->index - 1]; if ((last->type == MUNMAP) && From 468f65976a8d065ee1f27782337f4ee85a9151c5 Mon Sep 17 00:00:00 2001 From: Richard Weinberger Date: Sun, 20 Jul 2014 13:16:20 +0200 Subject: [PATCH 266/268] um: Fix hung task in fix_range_common() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit If do_ops() fails we have to release current->mm->mmap_sem otherwise the failing task will never terminate. Reported-by: Toralf Förster Signed-off-by: Richard Weinberger --- arch/um/kernel/tlb.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/arch/um/kernel/tlb.c b/arch/um/kernel/tlb.c index 1fc619e5dfe9e..f1b3eb14b855c 100644 --- a/arch/um/kernel/tlb.c +++ b/arch/um/kernel/tlb.c @@ -12,6 +12,7 @@ #include #include #include +#include struct host_vm_change { struct host_vm_op { @@ -286,8 +287,11 @@ void fix_range_common(struct mm_struct *mm, unsigned long start_addr, /* This is not an else because ret is modified above */ if (ret) { printk(KERN_ERR "fix_range_common: failed, killing current " - "process\n"); + "process: %d\n", task_tgid_vnr(current)); + /* We are under mmap_sem, release it such that current can terminate */ + up_write(¤t->mm->mmap_sem); force_sig(SIGKILL, current); + do_signal(); } } From bb6a1b2e189f797c0e4a116aec7ce77c344f11e0 Mon Sep 17 00:00:00 2001 From: Richard Weinberger Date: Sun, 20 Jul 2014 13:39:27 +0200 Subject: [PATCH 267/268] um: segv: Save regs only in case of a kernel mode fault ...otherwise me lose user mode regs and the resulting stack trace is useless. Signed-off-by: Richard Weinberger --- arch/um/kernel/trap.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/um/kernel/trap.c b/arch/um/kernel/trap.c index 974b87474a990..5678c3571e7cb 100644 --- a/arch/um/kernel/trap.c +++ b/arch/um/kernel/trap.c @@ -206,7 +206,7 @@ unsigned long segv(struct faultinfo fi, unsigned long ip, int is_user, int is_write = FAULT_WRITE(fi); unsigned long address = FAULT_ADDRESS(fi); - if (regs) + if (!is_user && regs) current->thread.segv_regs = container_of(regs, struct pt_regs, regs); if (!is_user && (address >= start_vm) && (address < end_vm)) { From 9a3c4145af32125c5ee39c0272662b47307a8323 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Sun, 20 Jul 2014 21:04:16 -0700 Subject: [PATCH 268/268] Linux 3.16-rc6 --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index f3c543df4697f..6b2774145d664 100644 --- a/Makefile +++ b/Makefile @@ -1,7 +1,7 @@ VERSION = 3 PATCHLEVEL = 16 SUBLEVEL = 0 -EXTRAVERSION = -rc5 +EXTRAVERSION = -rc6 NAME = Shuffling Zombie Juror # *DOCUMENTATION*